PyPI - fosslight-source - Versions diffs - 2.2.16__tar.gz → 2.3.0__tar.gz - Mend

fosslight-source 2.2.16tar.gz → 2.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{fosslight_source-2.2.16/src/fosslight_source.egg-info → fosslight_source-2.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fosslight_source
-Version: 2.2.16
+Version: 2.3.0
 Summary: FOSSLight Source Scanner
 Author: LG Electronics
 License-Expression: Apache-2.0
@@ -26,6 +26,7 @@ Requires-Dist: wheel>=0.38.1
 Requires-Dist: intbitset
 Requires-Dist: fosslight_binary>=5.1.22
 Requires-Dist: scancode-toolkit>=32.0.2
+Requires-Dist: cryptography<49; platform_system == "Darwin" and platform_machine == "x86_64"
 Requires-Dist: fingerprints==1.2.3
 Requires-Dist: normality==2.6.1
 Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"

{fosslight_source-2.2.16 → fosslight_source-2.3.0}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "fosslight_source"
-version = "2.2.16"
+version = "2.3.0"
 description = "FOSSLight Source Scanner"
 readme = "README.md"
 license = "Apache-2.0"
@@ -35,6 +35,8 @@ dependencies = [
     "intbitset",
     "fosslight_binary>=5.1.22",
     "scancode-toolkit>=32.0.2",
+    # cryptography 49.x does not provide macOS x86_64 wheels, causing source builds to require OpenSSL/pkg-config.
+    "cryptography<49; platform_system == 'Darwin' and platform_machine == 'x86_64'",
     "fingerprints==1.2.3",
     "normality==2.6.1",
     # Python 3.13+ needs psycopg2-binary 2.9.10+ (has wheels; 2.9.9 builds fail with _PyInterpreterState_Get)

fosslight_source-2.3.0/src/fosslight_source/_kb_client.py ADDED Viewed

@@ -0,0 +1,239 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020 LG Electronics Inc.
+# SPDX-License-Identifier: Apache-2.0
+import json
+import logging
+import time
+import urllib.error
+import urllib.request
+from typing import Dict, List, NamedTuple, Optional
+import fosslight_util.constant as constant
+logger = logging.getLogger(constant.LOGGER_NAME)
+_SCAN_JOB_POLL_INTERVAL_SEC = 1.0
+_SCAN_JOB_POLL_MAX_INTERVAL_SEC = 10.0
+_SCAN_JOB_REQUEST_TIMEOUT_SEC = 30
+_SCAN_JOB_MIN_WAIT_SEC = 300
+_SCAN_JOB_PER_HASH_SEC = 35
+def _kb_request(
+    kb_url: str,
+    path: str,
+    *,
+    method: str = "GET",
+    payload: dict | None = None,
+    kb_token: str = "",
+    timeout: int = _SCAN_JOB_REQUEST_TIMEOUT_SEC,
+) -> dict:
+    data = None
+    if payload is not None:
+        data = json.dumps(payload).encode("utf-8")
+    request = urllib.request.Request(f"{kb_url.rstrip('/')}/{path.lstrip('/')}", data=data, method=method)
+    request.add_header("Accept", "application/json")
+    if payload is not None:
+        request.add_header("Content-Type", "application/json")
+    if kb_token:
+        request.add_header("Authorization", f"Bearer {kb_token}")
+    with urllib.request.urlopen(request, timeout=timeout) as response:
+        body = response.read().decode()
+        return json.loads(body) if body else {}
+def _estimate_job_wait_timeout(file_hash_count: int) -> float:
+    return float(max(_SCAN_JOB_MIN_WAIT_SEC, file_hash_count * _SCAN_JOB_PER_HASH_SEC))
+def _coerce_count(value, default: int) -> int:
+    if value is None:
+        return default
+    try:
+        count = int(value)
+    except (TypeError, ValueError):
+        return default
+    return count if count >= 0 else default
+def _extract_response_message(response_body: dict) -> Optional[str]:
+    message = response_body.get("message")
+    if isinstance(message, str):
+        message = message.strip()
+        if message:
+            return message
+    return None
+def _scan_job_failure_message(response_body: dict) -> Optional[str]:
+    """Return server message when a scan/jobs response indicates failure."""
+    message = _extract_response_message(response_body)
+    if not message:
+        return None
+    status = response_body.get("status")
+    if status is None or str(status).lower() == "failed":
+        return message
+    if not response_body.get("job_id"):
+        return message
+    return None
+def _parse_http_error_body(error: urllib.error.HTTPError) -> dict:
+    try:
+        raw = error.read().decode()
+        return json.loads(raw) if raw else {}
+    except (json.JSONDecodeError, UnicodeDecodeError, OSError):
+        return {}
+class KbScanJobResult(NamedTuple):
+    origin_urls: Dict[str, str]
+    failure_message: Optional[str]
+    requested_count: int
+    returned_count: int
+def _kb_scan_job_result(
+    origin_urls: Dict[str, str],
+    failure_message: Optional[str],
+    requested_count: int,
+) -> KbScanJobResult:
+    return KbScanJobResult(
+        origin_urls=origin_urls,
+        failure_message=failure_message,
+        requested_count=requested_count,
+        returned_count=len(origin_urls),
+    )
+def fetch_origin_urls_via_scan_job(
+    file_hashes: List[str],
+    kb_url: str,
+    kb_token: str,
+) -> KbScanJobResult:
+    """
+    Create a POST /scan/jobs request, poll until completion, and return a file_hash -> origin_url map.
+    :param file_hashes: list of MD5 file hashes to look up.
+    :param kb_url: KB API base URL.
+    :param kb_token: KB API bearer token.
+    :return: origin URLs, optional failure message, and requested/returned file_hash counts.
+    """
+    unique_hashes = list(dict.fromkeys(h for h in file_hashes if h))
+    requested_count = len(unique_hashes)
+    if not unique_hashes:
+        return _kb_scan_job_result({}, None, 0)
+    create_payload = {"file_hashes": unique_hashes}
+    try:
+        created = _kb_request(kb_url, "scan/jobs", method="POST", payload=create_payload, kb_token=kb_token)
+    except urllib.error.HTTPError as e:
+        failure_message = _scan_job_failure_message(_parse_http_error_body(e))
+        if failure_message:
+            logger.warning(f"KB scan job create failed: {failure_message}")
+            return _kb_scan_job_result({}, failure_message, requested_count)
+        logger.warning(f"KB scan job create failed: HTTP {e.code} {e.reason}")
+        return _kb_scan_job_result({}, None, requested_count)
+    except urllib.error.URLError as e:
+        logger.warning(f"KB scan job create failed: {e}")
+        return _kb_scan_job_result({}, None, requested_count)
+    except Exception as e:
+        logger.warning(f"KB scan job create failed: {e}")
+        return _kb_scan_job_result({}, None, requested_count)
+    failure_message = _scan_job_failure_message(created)
+    if failure_message:
+        logger.warning(f"KB scan job create failed: {failure_message}")
+        return _kb_scan_job_result({}, failure_message, requested_count)
+    if str(created.get("status", "")).lower() == "failed":
+        logger.warning("KB scan job create failed")
+        return _kb_scan_job_result({}, None, requested_count)
+    job_id = created.get("job_id", "")
+    if not job_id:
+        logger.warning("KB scan job create response missing job_id")
+        return _kb_scan_job_result({}, None, requested_count)
+    fallback_count = len(unique_hashes)
+    accepted = _coerce_count(
+        created.get("accepted"),
+        _coerce_count(created.get("total"), fallback_count),
+    )
+    skipped = _coerce_count(created.get("skipped"), 0)
+    logger.info(
+        f"KB scan job created: job_id={job_id}, total={created.get('total', fallback_count)}, "
+        f"accepted={accepted}, skipped={skipped}"
+    )
+    if skipped:
+        logger.warning(f"KB scan job rate-limited: {skipped} file_hash(es) skipped by server")
+    if accepted == 0:
+        failure_message = (
+            f"rate-limited: {skipped} file_hash(es) skipped by server"
+            if skipped
+            else "scan job accepted no file_hashes"
+        )
+        return _kb_scan_job_result({}, failure_message, requested_count)
+    deadline = time.monotonic() + _estimate_job_wait_timeout(accepted)
+    interval = _SCAN_JOB_POLL_INTERVAL_SEC
+    origin_urls: Dict[str, str] = {}
+    while time.monotonic() < deadline:
+        try:
+            status = _kb_request(kb_url, f"scan/jobs/{job_id}", kb_token=kb_token)
+        except urllib.error.HTTPError as e:
+            if e.code == 404:
+                logger.warning(f"KB scan job not found: {job_id}")
+                return _kb_scan_job_result(origin_urls, "scan job not found", requested_count)
+            failure_message = _scan_job_failure_message(_parse_http_error_body(e))
+            if failure_message:
+                logger.warning(f"KB scan job status failed: {failure_message}")
+                return _kb_scan_job_result(origin_urls, failure_message, requested_count)
+            logger.warning(f"KB scan job status failed: HTTP {e.code}")
+            time.sleep(interval)
+            interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
+            continue
+        except urllib.error.URLError as e:
+            logger.warning(f"KB scan job status failed: {e}")
+            time.sleep(interval)
+            interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
+            continue
+        except Exception as e:
+            logger.warning(f"KB scan job status parse failed: {e}")
+            time.sleep(interval)
+            interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
+            continue
+        job_status = status.get("status", "")
+        if job_status == "completed":
+            for row in status.get("results", []):
+                if not isinstance(row, dict):
+                    continue
+                file_hash = row.get("file_hash", "")
+                if row.get("success") and row.get("output") and file_hash:
+                    origin_urls[file_hash] = row["output"]
+            logger.info(
+                f"KB scan job completed: job_id={job_id}, "
+                f"matched={len(origin_urls)}, failed={status.get('failed', 0)}"
+            )
+            return _kb_scan_job_result(origin_urls, None, requested_count)
+        if job_status == "failed":
+            failure_message = _scan_job_failure_message(status)
+            if failure_message:
+                logger.warning(f"KB scan job failed: job_id={job_id}, message={failure_message}")
+            else:
+                logger.warning(f"KB scan job failed: job_id={job_id}")
+            return _kb_scan_job_result(origin_urls, failure_message or "scan job failed", requested_count)
+        time.sleep(interval)
+        interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
+    logger.warning(f"KB scan job timed out: job_id={job_id}")
+    return _kb_scan_job_result(origin_urls, "scan job timed out", requested_count)

{fosslight_source-2.2.16 → fosslight_source-2.3.0}/src/fosslight_source/_parsing_scancode_file_item.py RENAMED Viewed

@@ -15,7 +15,7 @@ from typing import Tuple
 logger = logging.getLogger(constant.LOGGER_NAME)
 REMOVE_LICENSE = ["warranty-disclaimer"]
-regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE)
+regex = re.compile(r'licenseref-([a-z0-9\.\-]+)', re.IGNORECASE)
 find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
 KEYWORD_SPDX_ID = r'SPDX-License-Identifier\s*[\S]+'
 KEYWORD_DOWNLOAD_LOC = r'DownloadLocation\s*[\S]+'

{fosslight_source-2.2.16 → fosslight_source-2.3.0}/src/fosslight_source/_scan_item.py RENAMED Viewed

@@ -6,11 +6,7 @@
 import os
 import logging
 import re
-import json
-import base64
 import hashlib
-import urllib.request
-import urllib.error
 import fosslight_util.constant as constant
 from fosslight_util.oss_item import FileItem, OssItem, get_checksum_sha1
@@ -63,8 +59,9 @@ class SourceItem(FileItem):
         self.oss_version = ""
         self.checksum = get_checksum_sha1(value)
-        self.kb_origin_url = ""  # URL from OSS KB (_get_origin_url_from_md5_hash)
+        self.kb_origin_url = ""  # URL from OSS KB
         self.kb_evidence = ""   # Evidence from KB API (exact_match or code snippet)
+        self._cached_kb_md5 = ""  # MD5 precomputed for KB lookup (set by _collect_kb_file_hashes)
     def __del__(self) -> None:
         pass
@@ -124,37 +121,18 @@ class SourceItem(FileItem):
             logger.debug(f"Failed to compute MD5 for {self.source_name_or_path}: {e}")
         return md5_hex, wfp
-    def _get_origin_url_from_md5_hash(
-        self, md5_hash: str, wfp: str = "", kb_url: str = DEFAULT_KB_URL, kb_token: str = ""
-    ) -> str:
-        """Return origin_url from KB API."""
-        try:
-            payload = {"file_hash": md5_hash}
-            if wfp and wfp.strip():
-                payload["wfp_base64"] = base64.b64encode(wfp.strip().encode("utf-8")).decode("ascii")
-            request = urllib.request.Request(
-                f"{kb_url}query", data=json.dumps(payload).encode('utf-8'), method='POST'
-            )
-            request.add_header('Accept', 'application/json')
-            request.add_header('Content-Type', 'application/json')
-            if kb_token:
-                request.add_header('Authorization', f'Bearer {kb_token}')
-            with urllib.request.urlopen(request, timeout=10) as response:
-                data = json.loads(response.read().decode())
-                if isinstance(data, dict):
-                    return_code = data.get('return_code', -1)
-                    if return_code == 0:
-                        output = data.get('output', '')
-                        if output:
-                            return output
-        except urllib.error.URLError as e:
-            logger.debug(f"Failed to fetch origin_url from API for MD5 hash {md5_hash}: {e}")
-        except json.JSONDecodeError as e:
-            logger.debug(f"Failed to parse API response for MD5 hash {md5_hash}: {e}")
-        except Exception as e:
-            logger.debug(f"Error getting origin_url for MD5 hash {md5_hash}: {e}")
-        return ""
+    def _apply_kb_origin_url(self, origin_url: str) -> tuple[str, str, str]:
+        """Apply KB origin URL and return (oss_name, oss_version, download_url)."""
+        self.kb_origin_url = origin_url
+        self.kb_evidence = "exact_match"
+        extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
+        if extracted_name:
+            self.oss_name = extracted_name
+        if extracted_version:
+            self.oss_version = extracted_version
+        download_url = repo_url if repo_url else origin_url
+        self.download_location = [download_url]
+        return self.oss_name, self.oss_version, download_url
     def _extract_oss_info_from_url(self, url: str) -> tuple:
         """
@@ -196,7 +174,9 @@ class SourceItem(FileItem):
             return "", "", ""
     def set_oss_item(
-        self, path_to_scan: str = "", run_kb: bool = False, kb_url: str = DEFAULT_KB_URL, kb_token: str = ""
+        self,
+        path_to_scan: str = "",
+        kb_origin_urls: dict[str, str] | None = None,
     ) -> None:
         self.oss_items = []
         if self.download_location:
@@ -207,21 +187,15 @@ class SourceItem(FileItem):
                 self.oss_items.append(item)
         else:
             item = OssItem(self.oss_name, self.oss_version, self.licenses)
-            if run_kb and not self.is_license_text:
-                md5_hash, wfp = self._get_hash(path_to_scan)
+            if kb_origin_urls and not self.is_license_text:
+                md5_hash = self._cached_kb_md5
+                if not md5_hash:
+                    md5_hash, _wfp = self._get_hash(path_to_scan)
                 if md5_hash:
-                    origin_url = self._get_origin_url_from_md5_hash(md5_hash, wfp, kb_url, kb_token)
+                    origin_url = kb_origin_urls.get(md5_hash, "")
                     if origin_url:
-                        self.kb_origin_url = origin_url
-                        self.kb_evidence = "exact_match"
-                        extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
-                        if extracted_name:
-                            self.oss_name = extracted_name
-                        if extracted_version:
-                            self.oss_version = extracted_version
-                        download_url = repo_url if repo_url else origin_url
-                        self.download_location = [download_url]
-                        item = OssItem(self.oss_name, self.oss_version, self.licenses, download_url)
+                        oss_name, oss_version, download_url = self._apply_kb_origin_url(origin_url)
+                        item = OssItem(oss_name, oss_version, self.licenses, download_url)
             item.copyright = "\n".join(self.copyright)
             item.comment = self.comment

{fosslight_source-2.2.16 → fosslight_source-2.3.0}/src/fosslight_source/cli.py RENAMED Viewed

@@ -25,12 +25,14 @@ from fosslight_util.exclude import get_excluded_paths
 from .run_scanoss import run_scanoss_py
 from .run_scanoss import get_scanoss_extra_info
 import yaml
+import tqdm
 import argparse
 from .run_spdx_extractor import get_spdx_downloads
 from .run_manifest_extractor import get_manifest_licenses
-from ._scan_item import SourceItem, resolve_kb_config
+from ._scan_item import SourceItem, resolve_kb_config, is_notice_file
+from ._kb_client import fetch_origin_urls_via_scan_job
 from fosslight_util.oss_item import ScannerItem
-from typing import Tuple
+from typing import Optional, Tuple
 from ._scan_item import is_manifest_file
 import shutil
@@ -330,11 +332,51 @@ def mark_oss_info_correction_files_as_excluded(scan_results: list) -> None:
             item.comment = OSS_INFO_CORRECTION_COMMENT
+def _collect_kb_file_hashes(
+    scancode_result: list,
+    path_to_scan: str,
+    excluded_files: set,
+    hide_progress: bool,
+) -> tuple[list[str], list[tuple[SourceItem, str]]]:
+    """Collect MD5 hashes from scancode results and walk targets, plus (extra_item, md5) candidates."""
+    file_hashes: list[str] = []
+    extra_candidates: list[tuple[SourceItem, str]] = []
+    for item in scancode_result:
+        if item.is_license_text or is_notice_file(item.source_name_or_path):
+            continue
+        md5_hash, _wfp = item._get_hash(path_to_scan)
+        if md5_hash:
+            item._cached_kb_md5 = md5_hash
+            file_hashes.append(md5_hash)
+    abs_path_to_scan = os.path.abspath(path_to_scan)
+    scancode_paths = {item.source_name_or_path for item in scancode_result}
+    files_to_scan = []
+    for root, _dirs, files in os.walk(path_to_scan):
+        for file in files:
+            files_to_scan.append(os.path.join(root, file))
+    for file_path in tqdm.tqdm(files_to_scan, desc="KB Hashing", disable=hide_progress):
+        rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
+        if rel_path in scancode_paths or rel_path in excluded_files or is_notice_file(file_path):
+            continue
+        extra_item = SourceItem(rel_path)
+        md5_hash, _wfp = extra_item._get_hash(path_to_scan)
+        if md5_hash:
+            extra_item._cached_kb_md5 = md5_hash
+            file_hashes.append(md5_hash)
+            extra_candidates.append((extra_item, md5_hash))
+    return file_hashes, extra_candidates
 def merge_results(
     scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
     path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {},
     excluded_files: set = None, hide_progress: bool = False, kb_url: str = "", kb_token: str = ""
-) -> list:
+) -> tuple[list, Optional[str], int, int]:
     """
     Merge scanner results and spdx parsing result.
@@ -346,7 +388,7 @@ def merge_results(
     :param excluded_files: set of relative paths to exclude from KB-only file discovery.
     :param kb_url: KB API base URL.
     :param kb_token: KB API bearer token.
-    :return merged_result: list of merged result in SourceItem.
+    :return: (merged_result, kb failure message, requested file_hash count, returned match count).
     """
     if excluded_files is None:
         excluded_files = set()
@@ -381,32 +423,34 @@ def merge_results(
                 new_result_item.is_manifest_file = True
                 scancode_result.append(new_result_item)
+    kb_origin_urls: dict[str, str] = {}
+    kb_status_message: Optional[str] = None
+    kb_requested_count = 0
+    kb_returned_count = 0
+    extra_candidates: list[tuple[SourceItem, str]] = []
+    if run_kb:
+        file_hashes, extra_candidates = _collect_kb_file_hashes(
+            scancode_result, path_to_scan, excluded_files, hide_progress
+        )
+        if file_hashes:
+            kb_result = fetch_origin_urls_via_scan_job(file_hashes, kb_url, kb_token)
+            kb_origin_urls = kb_result.origin_urls
+            kb_status_message = kb_result.failure_message
+            kb_requested_count = kb_result.requested_count
+            kb_returned_count = kb_result.returned_count
     for item in scancode_result:
-        item.set_oss_item(path_to_scan, run_kb, kb_url, kb_token)
+        item.set_oss_item(path_to_scan, kb_origin_urls=kb_origin_urls)
     # Add OSSItem for files in path_to_scan that are not in scancode_result
     # when KB returns an origin URL for their MD5 hash (skip excluded_files)
     if run_kb:
-        import tqdm
-        abs_path_to_scan = os.path.abspath(path_to_scan)
-        scancode_paths = {item.source_name_or_path for item in scancode_result}
-        files_to_scan = []
-        for root, _dirs, files in os.walk(path_to_scan):
-            for file in files:
-                files_to_scan.append(os.path.join(root, file))
-        for file_path in tqdm.tqdm(files_to_scan, desc="KB Scanning", disable=hide_progress):
-            rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
-            if rel_path in scancode_paths or rel_path in excluded_files:
-                continue
-            extra_item = SourceItem(rel_path)
-            extra_item.set_oss_item(path_to_scan, run_kb, kb_url, kb_token)
+        for extra_item, _md5_hash in extra_candidates:
+            extra_item.set_oss_item(path_to_scan, kb_origin_urls=kb_origin_urls)
             if extra_item.download_location:
                 scancode_result.append(extra_item)
-                scancode_paths.add(rel_path)
-    return scancode_result
+    return scancode_result, kb_status_message, kb_requested_count, kb_returned_count
 def run_scanners(
@@ -500,13 +544,20 @@ def run_scanners(
                 if not check_kb_server_reachable(kb_url, kb_token):
                     run_kb = False
                     run_kb_msg = f"KB({kb_url}) Unreachable"
-                else:
-                    run_kb_msg = f"KB({kb_url}) Enabled"
             spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
-            merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
-                                          path_to_scan, run_kb, manifest_licenses, excluded_files,
-                                          hide_progress, kb_url, kb_token)
+            merged_result, kb_status_message, kb_requested_count, kb_returned_count = merge_results(
+                scancode_result, scanoss_result, spdx_downloads,
+                path_to_scan, run_kb, manifest_licenses, excluded_files,
+                hide_progress, kb_url, kb_token,
+            )
+            if kb_status_message:
+                run_kb_msg = f"KB({kb_url}) {kb_status_message}"
+            elif run_kb and kb_requested_count > 0:
+                run_kb_msg = (
+                    f"KB({kb_url}) response : {kb_returned_count}/"
+                    f" requested: {kb_requested_count}"
+                )
             mark_oss_info_correction_files_as_excluded(merged_result)
             scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
                                            print_matched_text, output_path, output_files, output_extensions, correct_mode,

{fosslight_source-2.2.16 → fosslight_source-2.3.0}/src/fosslight_source/run_scancode.py RENAMED Viewed

@@ -16,7 +16,14 @@ from ._parsing_scancode_file_item import parsing_file_item
 from ._parsing_scancode_file_item import get_error_from_header
 from fosslight_util.output_format import check_output_formats_v2
 from fosslight_binary.binary_analysis import check_binary
-from typing import Tuple
+from fosslight_util.exclude import (
+    EXCLUDE_DIRECTORY,
+    EXCLUDE_FILE_EXTENSION,
+    EXCLUDE_FILENAME,
+    PACKAGE_DIRECTORY,
+)
+from commoncode.fileset import is_included
+from typing import Tuple, Iterable
 logger = logging.getLogger(constant.LOGGER_NAME)
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -56,6 +63,88 @@ def _apply_scancode_unset_workaround(kwargs: dict) -> None:
         logger.debug("scancode UNSET workaround skipped: %s", ex)
+def _default_scancode_coarse_ignore_patterns() -> frozenset:
+    """
+    Coarse ignore patterns aligned with fosslight_util.get_excluded_paths() rules.
+    Uses segment-style globs so scancode does not need one pattern per file.
+    """
+    patterns = {".*"}
+    for name in PACKAGE_DIRECTORY + EXCLUDE_DIRECTORY:
+        patterns.add(name)
+    for ext in EXCLUDE_FILE_EXTENSION:
+        patterns.add(f"*.{ext}")
+    for name in EXCLUDE_FILENAME:
+        patterns.add(name)
+    return frozenset(patterns)
+def _is_covered_by_coarse_ignore(rel_path: str, coarse_patterns: Iterable[str]) -> bool:
+    excludes = {pattern: "" for pattern in coarse_patterns}
+    return not is_included(rel_path, includes={}, excludes=excludes)
+def _add_path_to_exclude_pattern(
+    patterns: set,
+    exclude_path: str,
+    abs_path_to_scan: str,
+    coarse_patterns: frozenset,
+) -> None:
+    exclude_path_normalized = os.path.normpath(exclude_path).replace("\\", "/")
+    if exclude_path_normalized.endswith("/**"):
+        base_dir = exclude_path_normalized[:-3].rstrip("/")
+        if base_dir:
+            full_exclude_path = os.path.join(abs_path_to_scan, base_dir)
+            if os.path.isdir(full_exclude_path):
+                patterns.add(base_dir)
+                patterns.add(exclude_path_normalized)
+            else:
+                patterns.add(exclude_path_normalized)
+        else:
+            patterns.add(exclude_path_normalized)
+        return
+    has_glob_chars = any(char in exclude_path_normalized for char in ['*', '?', '['])
+    if has_glob_chars:
+        patterns.add(exclude_path_normalized)
+        return
+    full_exclude_path = os.path.join(abs_path_to_scan, exclude_path_normalized)
+    if os.path.isdir(full_exclude_path):
+        base_path = exclude_path_normalized.rstrip("/")
+        if base_path:
+            patterns.add(base_path)
+            patterns.add(f"{base_path}/**")
+        else:
+            patterns.add(exclude_path_normalized)
+    elif os.path.isfile(full_exclude_path):
+        if not _is_covered_by_coarse_ignore(exclude_path_normalized, coarse_patterns):
+            patterns.add(f"**/{exclude_path_normalized}")
+    else:
+        patterns.add(exclude_path_normalized)
+def _build_scancode_ignore_patterns(
+    path_to_exclude: list,
+    abs_path_to_scan: str,
+    binary_paths: list,
+) -> tuple:
+    coarse_patterns = _default_scancode_coarse_ignore_patterns()
+    patterns = set(coarse_patterns)
+    for path in path_to_exclude or []:
+        if os.path.isabs(path):
+            exclude_path = os.path.relpath(path, abs_path_to_scan)
+        else:
+            exclude_path = path
+        _add_path_to_exclude_pattern(patterns, exclude_path, abs_path_to_scan, coarse_patterns)
+    for rel_path in binary_paths:
+        patterns.add(f"**/{rel_path}")
+    return tuple(sorted(patterns))
 def run_scan(
     path_to_scan: str, output_file_name: str = "",
     _write_json_file: bool = False, num_cores: int = -1,
@@ -115,51 +204,8 @@ def run_scan(
                 pretty_params["path_to_scan"] = path_to_scan
                 pretty_params["path_to_exclude"] = path_to_exclude
                 pretty_params["output_file"] = output_file_name
-                total_files_to_excluded = []
-                binary_files_to_exclude = []
                 abs_path_to_scan = os.path.abspath(path_to_scan)
-                if path_to_exclude:
-                    for path in path_to_exclude:
-                        if os.path.isabs(path):
-                            exclude_path = os.path.relpath(path, abs_path_to_scan)
-                        else:
-                            exclude_path = path
-                        exclude_path_normalized = os.path.normpath(exclude_path).replace("\\", "/")
-                        if exclude_path_normalized.endswith("/**"):
-                            base_dir = exclude_path_normalized[:-3].rstrip("/")
-                            if base_dir:
-                                full_exclude_path = os.path.join(abs_path_to_scan, base_dir)
-                                if os.path.isdir(full_exclude_path):
-                                    total_files_to_excluded.append(base_dir)
-                                    total_files_to_excluded.append(exclude_path_normalized)
-                                else:
-                                    total_files_to_excluded.append(exclude_path_normalized)
-                            else:
-                                total_files_to_excluded.append(exclude_path_normalized)
-                        else:
-                            has_glob_chars = any(char in exclude_path_normalized for char in ['*', '?', '['])
-                            if not has_glob_chars:
-                                full_exclude_path = os.path.join(abs_path_to_scan, exclude_path_normalized)
-                                is_dir = os.path.isdir(full_exclude_path)
-                                is_file = os.path.isfile(full_exclude_path)
-                            else:
-                                is_dir = False
-                                is_file = False
-                            if is_dir:
-                                base_path = exclude_path_normalized.rstrip("/")
-                                if base_path:
-                                    total_files_to_excluded.append(base_path)
-                                    total_files_to_excluded.append(f"{base_path}/**")
-                                else:
-                                    total_files_to_excluded.append(exclude_path_normalized)
-                            elif is_file:
-                                total_files_to_excluded.append(f"**/{exclude_path_normalized}")
-                            else:
-                                total_files_to_excluded.append(exclude_path_normalized)
+                binary_paths = []
                 for root, _, files in os.walk(path_to_scan):
                     for name in files:
                         full_path = os.path.join(root, name)
@@ -170,15 +216,13 @@ def run_scan(
                             continue
                         rel_path = os.path.relpath(full_path, abs_path_to_scan)
                         rel_norm = os.path.normpath(rel_path).replace("\\", "/")
-                        binary_files_to_exclude.append(rel_norm)
+                        binary_paths.append(rel_norm)
                         logger.debug(f"Excluded binary from scancode: {rel_norm}")
-                all_excluded_for_scancode = list(excluded_files) + binary_files_to_exclude
-                if all_excluded_for_scancode:
-                    total_files_to_excluded.extend(f"**/{file_path}" for file_path in all_excluded_for_scancode)
-                total_files_to_excluded = sorted(list(set(total_files_to_excluded)))
-                ignore_tuple = tuple(total_files_to_excluded)
+                ignore_tuple = _build_scancode_ignore_patterns(
+                    path_to_exclude, abs_path_to_scan, binary_paths
+                )
+                logger.debug(f"Scancode ignore patterns: {len(ignore_tuple)}")
                 kwargs = {
                     "max_depth": 100,
@@ -197,9 +241,7 @@ def run_scan(
                     "ignore": ignore_tuple,
                     "quiet": hide_progress
                 }
                 _apply_scancode_unset_workaround(kwargs)
                 rc, results = cli.run_scan(path_to_scan, **kwargs)
                 if not rc:
                     msg = "Source code analysis failed."

{fosslight_source-2.2.16 → fosslight_source-2.3.0/src/fosslight_source.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fosslight_source
-Version: 2.2.16
+Version: 2.3.0
 Summary: FOSSLight Source Scanner
 Author: LG Electronics
 License-Expression: Apache-2.0
@@ -26,6 +26,7 @@ Requires-Dist: wheel>=0.38.1
 Requires-Dist: intbitset
 Requires-Dist: fosslight_binary>=5.1.22
 Requires-Dist: scancode-toolkit>=32.0.2
+Requires-Dist: cryptography<49; platform_system == "Darwin" and platform_machine == "x86_64"
 Requires-Dist: fingerprints==1.2.3
 Requires-Dist: normality==2.6.1
 Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"

{fosslight_source-2.2.16 → fosslight_source-2.3.0}/src/fosslight_source.egg-info/SOURCES.txt RENAMED Viewed

@@ -4,6 +4,7 @@ README.md
 pyproject.toml
 src/fosslight_source/__init__.py
 src/fosslight_source/_help.py
+src/fosslight_source/_kb_client.py
 src/fosslight_source/_license_matched.py
 src/fosslight_source/_parsing_scancode_file_item.py
 src/fosslight_source/_parsing_scanoss_file.py

{fosslight_source-2.2.16 → fosslight_source-2.3.0}/src/fosslight_source.egg-info/requires.txt RENAMED Viewed

@@ -12,6 +12,9 @@ fingerprints==1.2.3
 normality==2.6.1
 tqdm
+[:platform_system == "Darwin" and platform_machine == "x86_64"]
+cryptography<49
 [:python_version < "3.11"]
 tomli