PyPI - fosslight-source - Versions diffs - 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl - Mend

fosslight-source 2.2.1py3-none-any.whl → 2.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

fosslight_source/_parsing_scancode_file_item.py CHANGED Viewed

@@ -7,12 +7,10 @@ import os
 import logging
 import re
 import fosslight_util.constant as constant
-from fosslight_util.get_pom_license import get_license_from_pom
 from ._license_matched import MatchedLicense
 from ._scan_item import SourceItem
 from ._scan_item import replace_word
 from ._scan_item import is_notice_file
-from ._scan_item import is_manifest_file
 from typing import Tuple
 logger = logging.getLogger(constant.LOGGER_NAME)
@@ -181,35 +179,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
                     if len(license_detected) > 0:
                         result_item.licenses = license_detected
-                        detected_without_pom = []
-                        if is_manifest_file(file_path) and len(license_detected) > 0:
-                            result_item.is_manifest_file = True
-                            if file_path.endswith('.pom'):
-                                try:
-                                    pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
-                                    normalize_pom_licenses = []
-                                    if pom_licenses:
-                                        pom_license_list = pom_licenses.split(', ')
-                                        for pom_license in pom_license_list:
-                                            if pom_license not in license_detected:
-                                                for lic_matched_key, lic_info in license_list.items():
-                                                    if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
-                                                        matched_txt = str(lic_info.matched_text).replace(',', '')
-                                                        if pom_license in matched_txt:
-                                                            normalize_pom_licenses.append(lic_info.license)
-                                                            break
-                                            else:
-                                                normalize_pom_licenses.append(pom_license)
-                                    detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
-                                    if detected_without_pom:
-                                        result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
-                                        result_item.licenses = []
-                                        result_item.licenses = normalize_pom_licenses
-                                        if not normalize_pom_licenses:
-                                            result_item.exclude = True
-                                except Exception as ex:
-                                    logger.info(f"Failed to extract license from POM {file_path}: {ex}")
                         # Remove copyright info for license text file of GPL family
                         if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
                             logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
@@ -217,7 +186,7 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
                         else:
                             result_item.copyright = copyright_value_list
-                        if len(license_expression_list) > 0 and not detected_without_pom:
+                        if len(license_expression_list) > 0:
                             license_expression_list = list(
                                 set(license_expression_list))
                             result_item.comment = ','.join(license_expression_list)
@@ -314,35 +283,6 @@ def parsing_scancode_32_later(
                     file.get("percentage_of_license_text", 0) > 90 and not is_source_file
                 )
-                detected_without_pom = []
-                if is_manifest_file(file_path) and len(license_detected) > 0:
-                    result_item.is_manifest_file = True
-                    if file_path.endswith('.pom'):
-                        try:
-                            pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
-                            normalize_pom_licenses = []
-                            if pom_licenses:
-                                pom_license_list = pom_licenses.split(', ')
-                                for pom_license in pom_license_list:
-                                    if pom_license not in license_detected:
-                                        for lic_matched_key, lic_info in license_list.items():
-                                            if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
-                                                matched_txt = str(lic_info.matched_text).replace(',', '')
-                                                if pom_license in matched_txt:
-                                                    normalize_pom_licenses.append(lic_info.license)
-                                                    break
-                                    else:
-                                        normalize_pom_licenses.append(pom_license)
-                            detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
-                            if detected_without_pom:
-                                result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
-                                result_item.licenses = []
-                                result_item.licenses = normalize_pom_licenses
-                                if not normalize_pom_licenses:
-                                    result_item.exclude = True
-                        except Exception as ex:
-                            logger.info(f"Failed to extract license from POM {file_path}: {ex}")
                 # Remove copyright info for license text file of GPL family
                 if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
                     logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
@@ -350,7 +290,7 @@ def parsing_scancode_32_later(
                 else:
                     result_item.copyright = copyright_value_list
-                if len(license_detected) > 1 and not detected_without_pom:
+                if len(license_detected) > 1:
                     license_expression_spdx = file.get("detected_license_expression_spdx", "")
                     license_expression = file.get("detected_license_expression", "")
                     if license_expression_spdx:

fosslight_source/_scan_item.py CHANGED Viewed

@@ -18,7 +18,7 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
 _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
                     '[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
                     'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
-_manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'pubspec\.yaml$', r'.*\.podspec$', r'Cargo\.toml$']
+_manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'setup\.cfg$', r'.*\.podspec$', r'Cargo\.toml$']
 MAX_LICENSE_LENGTH = 200
 MAX_LICENSE_TOTAL_LENGTH = 600
 SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"

fosslight_source/cli.py CHANGED Viewed

@@ -25,9 +25,13 @@ from .run_scanoss import get_scanoss_extra_info
 import yaml
 import argparse
 from .run_spdx_extractor import get_spdx_downloads
+from .run_manifest_extractor import get_manifest_licenses
 from ._scan_item import SourceItem, KB_URL
 from fosslight_util.oss_item import ScannerItem
 from typing import Tuple
+from ._scan_item import is_manifest_file
+import shutil
 SRC_SHEET_NAME = 'SRC_FL_Source'
 SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
@@ -37,9 +41,7 @@ MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
                                   'OSS Version', 'License', 'Download Location',
                                   'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
 SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
-EXCLUDE_FILENAME = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
-                    "configure", "configure.ac", "depcomp", "compile", "missing", "Makefile"]
-EXCLUDE_FILE_EXTENSION = [".m4", ".in", ".po"]
 logger = logging.getLogger(constant.LOGGER_NAME)
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -135,7 +137,7 @@ def create_report_file(
     output_path: str = "", output_files: list = [],
     output_extensions: list = [], correct_mode: bool = True,
     correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
-    formats: list = [], api_limit_exceed: bool = False, files_count: int = 0
+    formats: list = [], api_limit_exceed: bool = False, files_count: int = 0, final_output_path: str = ""
 ) -> 'ScannerItem':
     """
     Create report files for given scanned result.
@@ -149,10 +151,7 @@ def create_report_file(
     sheet_list = {}
     _json_ext = ".json"
-    if output_path == "":
-        output_path = os.getcwd()
-    else:
-        output_path = os.path.abspath(output_path)
+    output_path = os.path.abspath(output_path)
     if not output_files:
         # If -o does not contains file name, set default name
@@ -196,15 +195,27 @@ def create_report_file(
     scan_item.set_cover_pathinfo(path_to_scan, path_to_exclude)
     scan_item.set_cover_comment(f"Scanned files: {files_count}")
-    if api_limit_exceed:
-        scan_item.set_cover_comment("(Some of) SCANOSS scan was skipped. (API limits being exceeded)")
-    if not merged_result:
+    if merged_result:
+        scan_item.set_cover_comment(f"Detected source : {len(merged_result)}")
+    else:
         if files_count < 1:
             scan_item.set_cover_comment("(No file detected.)")
         else:
             scan_item.set_cover_comment("(No OSS detected.)")
+    if api_limit_exceed:
+        scan_item.set_cover_comment("SCANOSS skipped (API limits)")
+    run_kb = True if selected_scanner in ['kb', 'all'] else False
+    if run_kb:
+        scan_item.set_cover_comment("KB Enabled" if check_kb_server_reachable() else "KB Unreachable")
+    display_mode = selected_scanner
+    if selected_scanner == "kb":
+        display_mode += ", scancode"
+    elif selected_scanner == "all":
+        display_mode = "kb, scancode, scanoss"
+    scan_item.set_cover_comment(f"Mode : {display_mode}")
     if merged_result:
         sheet_list = {}
         scan_item.append_file_items(merged_result, PKG_NAME)
@@ -240,12 +251,13 @@ def create_report_file(
         #     del sheet_list["scanoss_reference"]
         results.append(write_output_file(combined_path_and_file, output_extension, scan_item, extended_header, "", output_format))
     for success, msg, result_file in results:
+        final_result_file = result_file.replace(output_path, final_output_path)
         if success:
-            logger.info(f"Output file: {result_file}")
+            logger.info(f"Output file: {final_result_file}")
             for row in scan_item.get_cover_comment():
                 logger.info(row)
         else:
-            logger.error(f"Fail to generate result file {result_file}. msg:({msg})")
+            logger.error(f"Fail to generate result file {final_result_file}. msg:({msg})")
     return scan_item
@@ -265,7 +277,7 @@ def check_kb_server_reachable() -> bool:
 def merge_results(
     scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
-    path_to_scan: str = "", run_kb: bool = False
+    path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {}
 ) -> list:
     """
@@ -291,6 +303,19 @@ def merge_results(
                 new_result_item = SourceItem(file_name)
                 new_result_item.download_location = download_location
                 scancode_result.append(new_result_item)
+    if manifest_licenses:
+        for file_name, licenses in manifest_licenses.items():
+            if file_name in scancode_result:
+                merged_result_item = scancode_result[scancode_result.index(file_name)]
+                # overwrite existing detected licenses with manifest-provided licenses
+                merged_result_item.licenses = []  # clear existing licenses (setter clears when value falsy)
+                merged_result_item.licenses = licenses
+                merged_result_item.is_manifest_file = True
+            else:
+                new_result_item = SourceItem(file_name)
+                new_result_item.licenses = licenses
+                new_result_item.is_manifest_file = True
+                scancode_result.append(new_result_item)
     if run_kb and not check_kb_server_reachable():
         run_kb = False
     if run_kb:
@@ -310,7 +335,8 @@ def run_scanners(
     called_by_cli: bool = True, print_matched_text: bool = False,
     formats: list = [], time_out: int = 120,
     correct_mode: bool = True, correct_filepath: str = "",
-    selected_scanner: str = 'all', path_to_exclude: list = []
+    selected_scanner: str = 'all', path_to_exclude: list = [],
+    all_exclude_mode: tuple = ()
 ) -> Tuple[bool, str, 'ScannerItem', list, list]:
     """
     Run Scancode and scanoss.py for the given path.
@@ -341,6 +367,11 @@ def run_scanners(
     success, msg, output_path, output_files, output_extensions, formats = check_output_formats_v2(output_file_name, formats)
+    if output_path == "":
+        output_path = os.getcwd()
+    final_output_path = output_path
+    output_path = os.path.join(os.path.dirname(output_path), '.fosslight_temp')
     logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
                                   True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
@@ -349,10 +380,18 @@ def run_scanners(
         print_matched_text = False
     if success:
-        path_to_exclude_with_filename = path_to_exclude + EXCLUDE_FILENAME
-        excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped = (
-            get_excluded_paths(path_to_scan, path_to_exclude_with_filename, EXCLUDE_FILE_EXTENSION))
-        logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
+        if all_exclude_mode and len(all_exclude_mode) == 4:
+            (excluded_path_with_default_exclusion,
+             excluded_path_without_dot,
+             excluded_files,
+             cnt_file_except_skipped) = all_exclude_mode
+        else:
+            path_to_exclude_with_filename = path_to_exclude
+            (excluded_path_with_default_exclusion,
+             excluded_path_without_dot,
+             excluded_files,
+             cnt_file_except_skipped) = get_excluded_paths(path_to_scan, path_to_exclude_with_filename)
+            logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
         if not selected_scanner:
             selected_scanner = 'all'
@@ -365,16 +404,17 @@ def run_scanners(
                                                                                       excluded_files)
         excluded_files = set(excluded_files) if excluded_files else set()
         if selected_scanner in ['scanoss', 'all']:
-            scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True, write_json_file,
+            scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True,
                                                               num_cores, excluded_path_with_default_exclusion, excluded_files)
         if selected_scanner in SCANNER_TYPE:
             run_kb = True if selected_scanner in ['kb', 'all'] else False
-            spdx_downloads = get_spdx_downloads(path_to_scan, excluded_files)
-            merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads, path_to_scan, run_kb)
+            spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
+            merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
+                                          path_to_scan, run_kb, manifest_licenses)
             scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
                                            print_matched_text, output_path, output_files, output_extensions, correct_mode,
                                            correct_filepath, path_to_scan, excluded_path_without_dot, formats,
-                                           api_limit_exceed, cnt_file_except_skipped)
+                                           api_limit_exceed, cnt_file_except_skipped, final_output_path)
         else:
             print_help_msg_source_scanner()
             result_log[RESULT_KEY] = "Unsupported scanner"
@@ -382,8 +422,48 @@ def run_scanners(
     else:
         result_log[RESULT_KEY] = f"Format error. {msg}"
         success = False
+    try:
+        shutil.copytree(output_path, final_output_path, dirs_exist_ok=True)
+        shutil.rmtree(output_path)
+    except Exception as ex:
+        logger.debug(f"Failed to move temp files: {ex}")
     return success, result_log.get(RESULT_KEY, ""), scan_item, license_list, scanoss_result
+def metadata_collector(path_to_scan: str, excluded_files: set) -> dict:
+    """
+    Collect metadata for merging.
+    - Traverse files with exclusions applied
+    - spdx_downloads: {rel_path: [download_urls]}
+    - manifest_licenses: {rel_path: [license_names]}
+    :return: (spdx_downloads, manifest_licenses)
+    """
+    abs_path_to_scan = os.path.abspath(path_to_scan)
+    spdx_downloads = {}
+    manifest_licenses = {}
+    for root, dirs, files in os.walk(path_to_scan):
+        for file in files:
+            file_path = os.path.join(root, file)
+            rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
+            if rel_path_file in excluded_files:
+                continue
+            downloads = get_spdx_downloads(file_path)
+            if downloads:
+                spdx_downloads[rel_path_file] = downloads
+            if is_manifest_file(file_path):
+                licenses = get_manifest_licenses(file_path)
+                if licenses:
+                    manifest_licenses[rel_path_file] = licenses
+    return spdx_downloads, manifest_licenses
 if __name__ == '__main__':
     main()

fosslight_source/run_manifest_extractor.py ADDED Viewed

@@ -0,0 +1,249 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2025 LG Electronics Inc.
+# SPDX-License-Identifier: Apache-2.0
+import os
+import json
+import re
+import logging
+from fosslight_util.get_pom_license import get_license_from_pom
+import fosslight_util.constant as constant
+logger = logging.getLogger(constant.LOGGER_NAME)
+def _split_spdx_expression(value: str) -> list[str]:
+    parts = re.split(r'\s+(?:OR|AND)\s+|[|]{2}|&&', value, flags=re.IGNORECASE)
+    tokens: list[str] = []
+    for part in parts:
+        token = part.strip().strip('()')
+        if token:
+            tokens.append(token)
+    unique: list[str] = []
+    for t in tokens:
+        if t not in unique:
+            unique.append(t)
+    return unique
+def get_licenses_from_package_json(file_path: str) -> list[str]:
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+    except Exception as ex:
+        logger.info(f"Failed to read package.json {file_path}: {ex}")
+        return []
+    if not isinstance(data, dict):
+        return []
+    licenses: list[str] = []
+    license_field = data.get('license')
+    if isinstance(license_field, str):
+        value = license_field.strip()
+        if value.upper().startswith('SEE LICENSE IN'):
+            return []
+        licenses.extend(_split_spdx_expression(value))
+    elif isinstance(license_field, dict):
+        type_val = license_field.get('type')
+        if isinstance(type_val, str):
+            type_val = type_val.strip()
+            if type_val:
+                licenses.append(type_val)
+    if not licenses:
+        legacy = data.get('licenses')
+        if isinstance(legacy, list):
+            for item in legacy:
+                if isinstance(item, str):
+                    token = item.strip()
+                    if token:
+                        licenses.append(token)
+                elif isinstance(item, dict):
+                    t = item.get('type')
+                    if isinstance(t, str):
+                        t = t.strip()
+                        if t:
+                            licenses.append(t)
+    unique: list[str] = []
+    for lic in licenses:
+        if lic not in unique:
+            unique.append(lic)
+    return unique
+def get_licenses_from_setup_cfg(file_path: str) -> list[str]:
+    try:
+        import configparser
+        parser = configparser.ConfigParser()
+        parser.read(file_path, encoding='utf-8')
+        if parser.has_section('metadata'):
+            license_value = parser.get('metadata', 'license', fallback='').strip()
+            if license_value:
+                return _split_spdx_expression(license_value)
+    except Exception as ex:
+        logger.info(f"Failed to parse setup.cfg with configparser for {file_path}: {ex}")
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        meta_match = re.search(r'^\s*\[metadata\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
+        if not meta_match:
+            return []
+        block = meta_match.group(1)
+        m = re.search(r'^\s*license\s*=\s*(.+)$', block, flags=re.MULTILINE)
+        if not m:
+            return []
+        val = m.group(1).strip()
+        if (len(val) >= 2) and ((val[0] == val[-1]) and val[0] in ('"', "'")):
+            val = val[1:-1].strip()
+        if not val:
+            return []
+        return _split_spdx_expression(val)
+    except Exception as ex:
+        logger.info(f"Failed to parse setup.cfg {file_path} via regex fallback: {ex}")
+        return []
+def get_licenses_from_setup_py(file_path: str) -> list[str]:
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+    except Exception as ex:
+        logger.info(f"Failed to read setup.py {file_path}: {ex}")
+        return []
+    match = re.search(r'license\s*=\s*([\'"]{1,3})(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
+    if not match:
+        return []
+    value = match.group(2).strip()
+    if not value:
+        return []
+    return _split_spdx_expression(value)
+def get_licenses_from_podspec(file_path: str) -> list[str]:
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+    except Exception as ex:
+        logger.info(f"Failed to read podspec {file_path}: {ex}")
+        return []
+    m = re.search(r'\blicense\s*=\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE)
+    if m:
+        value = m.group(2).strip()
+        if value:
+            return _split_spdx_expression(value)
+    m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
+    if m:
+        value = m.group(2).strip()
+        if value:
+            return _split_spdx_expression(value)
+    m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*:(\w+)', content, flags=re.IGNORECASE | re.DOTALL)
+    if m:
+        value = m.group(1).strip()
+        if value:
+            return _split_spdx_expression(value)
+    m = re.search(r'\blicense\s*=\s*:(\w+)', content, flags=re.DOTALL | re.IGNORECASE)
+    if m:
+        value = m.group(1).strip()
+        if value:
+            return _split_spdx_expression(value)
+    return []
+def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
+    try:
+        data = None
+        try:
+            import tomllib as toml_loader  # Python 3.11+
+            with open(file_path, 'rb') as f:
+                data = toml_loader.load(f)
+        except Exception:
+            try:
+                import tomli as toml_loader  # Backport
+                with open(file_path, 'rb') as f:
+                    data = toml_loader.load(f)
+            except Exception:
+                data = None
+        if isinstance(data, dict):
+            package_tbl = data.get('package') or {}
+            license_value = package_tbl.get('license')
+            if isinstance(license_value, str) and license_value.strip():
+                return _split_spdx_expression(license_value.strip())
+            if package_tbl.get('license-file'):
+                return []
+    except Exception as ex:
+        logger.info(f"Failed to parse Cargo.toml via toml parser for {file_path}: {ex}")
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        pkg_match = re.search(r'^\s*\[package\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
+        if not pkg_match:
+            return []
+        block = pkg_match.group(1)
+        m = re.search(r'^\s*license\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)', block, flags=re.MULTILINE | re.DOTALL)
+        if m:
+            val = m.group('val').strip()
+            if val:
+                return _split_spdx_expression(val)
+        m2 = re.search(r'^\s*license-file\s*=\s*(?:"""|\'\'\'|"|\')(.*?)(?:"""|\'\'\'|"|\')', block,
+                       flags=re.MULTILINE | re.DOTALL)
+        if m2:
+            return []
+    except Exception as ex:
+        logger.info(f"Failed to parse Cargo.toml {file_path}: {ex}")
+        return []
+    return []
+def get_manifest_licenses(file_path: str) -> list[str]:
+    if file_path.endswith('.pom'):
+        try:
+            pom_licenses = get_license_from_pom(group_id='', artifact_id='', version='', pom_path=file_path, check_parent=True)
+            if not pom_licenses:
+                return []
+            return [x.strip() for x in pom_licenses.split(', ') if x.strip()]
+        except Exception as ex:
+            logger.info(f"Failed to extract license from POM {file_path}: {ex}")
+            return []
+    elif os.path.basename(file_path).lower() == 'package.json':
+        try:
+            return get_licenses_from_package_json(file_path)
+        except Exception as ex:
+            logger.info(f"Failed to extract license from package.json {file_path}: {ex}")
+            return []
+    elif os.path.basename(file_path).lower() == 'setup.cfg':
+        try:
+            return get_licenses_from_setup_cfg(file_path)
+        except Exception as ex:
+            logger.info(f"Failed to extract license from setup.cfg {file_path}: {ex}")
+            return []
+    elif os.path.basename(file_path).lower() == 'setup.py':
+        try:
+            return get_licenses_from_setup_py(file_path)
+        except Exception as ex:
+            logger.info(f"Failed to extract license from setup.py {file_path}: {ex}")
+            return []
+    elif os.path.basename(file_path).lower().endswith('.podspec'):
+        try:
+            return get_licenses_from_podspec(file_path)
+        except Exception as ex:
+            logger.info(f"Failed to extract license from podspec {file_path}: {ex}")
+            return []
+    elif os.path.basename(file_path).lower() == 'cargo.toml':
+        try:
+            return get_licenses_from_cargo_toml(file_path)
+        except Exception as ex:
+            logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
+            return []

fosslight_source/run_scanoss.py CHANGED Viewed

@@ -8,14 +8,10 @@ import importlib_metadata
 import warnings
 import logging
 import json
-from datetime import datetime
 import fosslight_util.constant as constant
-from fosslight_util.set_log import init_log
 from fosslight_util.output_format import check_output_formats_v2  # , write_output_file
 from ._parsing_scanoss_file import parsing_scan_result  # scanoss
 from ._parsing_scanoss_file import parsing_extra_info  # scanoss
-import shutil
-from pathlib import Path
 from scanoss.scanner import Scanner, ScanType
 import io
 import contextlib
@@ -32,7 +28,7 @@ def get_scanoss_extra_info(scanned_result: dict) -> list:
 def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
-                   called_by_cli: bool = False, write_json_file: bool = False, num_threads: int = -1,
+                   called_by_cli: bool = False, num_threads: int = -1,
                    path_to_exclude: list = [], excluded_files: set = None) -> list:
     """
     Run scanoss.py for the given path.
@@ -46,13 +42,8 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
     """
     success, msg, output_path, output_files, output_extensions, formats = check_output_formats_v2(output_file_name, format)
-    if not called_by_cli:
-        global logger
-        _start_time = datetime.now().strftime('%y%m%d_%H%M')
-        logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"),
-                                       True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
     scanoss_file_list = []
+    api_limit_exceed = False
     try:
         importlib_metadata.distribution("scanoss")
     except Exception as error:
@@ -60,12 +51,6 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
         logger.warning("Please install scanoss and dataclasses before run fosslight_source with scanoss option.")
         return scanoss_file_list
-    if output_path == "":  # if json output with _write_json_file not used, output_path won't be needed.
-        output_path = os.getcwd()
-    else:
-        output_path = os.path.abspath(output_path)
-        if not os.path.isdir(output_path):
-            Path(output_path).mkdir(parents=True, exist_ok=True)
     output_json_file = os.path.join(output_path, SCANOSS_OUTPUT_FILE)
     if os.path.exists(output_json_file):  # remove scanner_output.wfp file if exist
         os.remove(output_json_file)
@@ -84,7 +69,6 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
             scanner.scan_folder_with_options(scan_dir=path_to_scan)
         captured_output = output_buffer.getvalue()
         api_limit_exceed = "due to service limits being exceeded" in captured_output
-        logger.debug(f"{captured_output}")
         if os.path.isfile(output_json_file):
             with open(output_json_file, "r") as st_json:
@@ -103,13 +87,4 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
     logger.info(f"|---Number of files detected with SCANOSS: {(len(scanoss_file_list))}")
-    try:
-        if write_json_file:
-            shutil.move(SCANOSS_RESULT_FILE, output_path)
-        else:
-            os.remove(output_json_file)
-            os.remove(SCANOSS_RESULT_FILE)
-    except Exception as error:
-        logger.debug(f"Moving scanoss raw files failed.: {error}")
     return scanoss_file_list, api_limit_exceed

fosslight_source/run_spdx_extractor.py CHANGED Viewed

@@ -12,26 +12,15 @@ import mmap
 logger = logging.getLogger(constant.LOGGER_NAME)
-def get_spdx_downloads(path_to_scan: str, path_to_exclude: set = None) -> dict:
-    download_dict = {}
+def get_spdx_downloads(file_path: str) -> list[str]:
+    results = []
     find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
-    abs_path_to_scan = os.path.abspath(path_to_scan)
-    for root, dirs, files in os.walk(path_to_scan):
-        for file in files:
-            file_path = os.path.join(root, file)
-            rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
-            if rel_path_file in path_to_exclude:
-                continue
-            try:
-                if os.path.getsize(file_path) > 0:
-                    with open(file_path, "r") as f:
-                        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
-                            for word in find_word.findall(mmap_obj):
-                                if rel_path_file in download_dict:
-                                    download_dict[rel_path_file].append(word.decode('utf-8'))
-                                else:
-                                    download_dict[rel_path_file] = [word.decode('utf-8')]
-            except Exception as ex:
-                logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {ex}")
-    return download_dict
+    try:
+        if os.path.getsize(file_path) > 0:
+            with open(file_path, "r") as f:
+                with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
+                    for word in find_word.findall(mmap_obj):
+                        results.append(word.decode('utf-8'))
+    except Exception as ex:
+        logger.warning(f"Failed to extract SPDX download location. {file_path}, {ex}")
+    return results

{fosslight_source-2.2.1.dist-info → fosslight_source-2.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fosslight_source
-Version: 2.2.1
+Version: 2.2.3
 Summary: FOSSLight Source Scanner
 Home-page: https://github.com/fosslight/fosslight_source_scanner
 Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -17,7 +17,7 @@ License-File: LICENSE
 Requires-Dist: pyparsing
 Requires-Dist: scanoss>=1.18.0
 Requires-Dist: XlsxWriter
-Requires-Dist: fosslight_util>=2.1.34
+Requires-Dist: fosslight_util>=2.1.37
 Requires-Dist: PyYAML
 Requires-Dist: wheel>=0.38.1
 Requires-Dist: intbitset

fosslight_source-2.2.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
+fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
+fosslight_source/_parsing_scancode_file_item.py,sha256=9TjCwTTPaytqTlZzCKzaX-n20xJLy346xwp0Ee-rWWA,14951
+fosslight_source/_parsing_scanoss_file.py,sha256=L3iHqmQF2jeSpHYuYSre44doXKy-BoX0u1Lm2IfJSU8,3866
+fosslight_source/_scan_item.py,sha256=rWoC-jMc6Hf_dpiwVdQjATNhkzRgLVn966q3UA1TAxc,9412
+fosslight_source/cli.py,sha256=GDwBlwgSvPtWlKhk6hZWQSUl71MJYaGIE4_Mcp1w_L8,20367
+fosslight_source/run_manifest_extractor.py,sha256=8itu69eoriEnSskVDF9rnkLZ3d515KTaKy6tkZKWgBk,8940
+fosslight_source/run_scancode.py,sha256=TFyNLV6P9rSBo9royDoG6az4l7Tkpl8Gr66IFK1DBU8,9021
+fosslight_source/run_scanoss.py,sha256=91hQ60BcjgCkfrcWuMYoR4lRSCa1-AdLMxVubTx_Wv4,3763
+fosslight_source/run_spdx_extractor.py,sha256=wIdjDIpzglc2tfrh_YodkAQ0QqfgfmJBSuE2bf4w4Tg,862
+fosslight_source-2.2.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+fosslight_source-2.2.3.dist-info/METADATA,sha256=Md1A-4vaFc-sabOYYLntHZFo6NoohWA-Sn_H7K4LWus,3557
+fosslight_source-2.2.3.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
+fosslight_source-2.2.3.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
+fosslight_source-2.2.3.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
+fosslight_source-2.2.3.dist-info/RECORD,,

fosslight_source-2.2.1.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
-fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
-fosslight_source/_parsing_scancode_file_item.py,sha256=-shPakF0oQWDzxWFylE2dQ93O4tgCudYM2zvX4K5glQ,19386
-fosslight_source/_parsing_scanoss_file.py,sha256=L3iHqmQF2jeSpHYuYSre44doXKy-BoX0u1Lm2IfJSU8,3866
-fosslight_source/_scan_item.py,sha256=NMmYaqdpNM-yeJxXPVPmoPo_thOnaAGRXYDEcpD6s2Y,9415
-fosslight_source/cli.py,sha256=qbp87Rhe5c2hIcF1-5TR6btPeOCe32Ffq1pxJM9ADcY,17303
-fosslight_source/run_scancode.py,sha256=TFyNLV6P9rSBo9royDoG6az4l7Tkpl8Gr66IFK1DBU8,9021
-fosslight_source/run_scanoss.py,sha256=_gdA4kOByI4saT4bDvMwIabpxtpH4f_yruHdBtb_g-o,4852
-fosslight_source/run_spdx_extractor.py,sha256=LLyYKpkpD5Qxkv_qTNBrFtKvrmnXw9SFYZCkpishb_c,1537
-fosslight_source-2.2.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-fosslight_source-2.2.1.dist-info/METADATA,sha256=dXxCjJqwvuDy1Yz4KTltcIeO6etLzTx0AbwF_x0aGkU,3557
-fosslight_source-2.2.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
-fosslight_source-2.2.1.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
-fosslight_source-2.2.1.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
-fosslight_source-2.2.1.dist-info/RECORD,,

{fosslight_source-2.2.1.dist-info → fosslight_source-2.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{fosslight_source-2.2.1.dist-info → fosslight_source-2.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fosslight_source-2.2.1.dist-info → fosslight_source-2.2.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{fosslight_source-2.2.1.dist-info → fosslight_source-2.2.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

fosslight-source 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl

fosslight-source 2.2.1py3-none-any.whl → 2.2.3py3-none-any.whl