PyPI - fosslight-source - Versions diffs - 2.2.0__tar.gz → 2.2.1__tar.gz - Mend

fosslight-source 2.2.0tar.gz → 2.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{fosslight_source-2.2.0/src/fosslight_source.egg-info → fosslight_source-2.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fosslight_source
-Version: 2.2.0
+Version: 2.2.1
 Summary: FOSSLight Source Scanner
 Home-page: https://github.com/fosslight/fosslight_source_scanner
 Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -17,7 +17,7 @@ License-File: LICENSE
 Requires-Dist: pyparsing
 Requires-Dist: scanoss>=1.18.0
 Requires-Dist: XlsxWriter
-Requires-Dist: fosslight_util>=2.1.31
+Requires-Dist: fosslight_util>=2.1.34
 Requires-Dist: PyYAML
 Requires-Dist: wheel>=0.38.1
 Requires-Dist: intbitset

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/requirements.txt RENAMED Viewed

@@ -1,7 +1,7 @@
 pyparsing
 scanoss>=1.18.0
 XlsxWriter
-fosslight_util>=2.1.31
+fosslight_util>=2.1.34
 PyYAML
 wheel>=0.38.1
 intbitset

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/setup.py RENAMED Viewed

@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
 if __name__ == "__main__":
     setup(
         name='fosslight_source',
-        version='2.2.0',
+        version='2.2.1',
         package_dir={"": "src"},
         packages=find_packages(where='src'),
         description='FOSSLight Source Scanner',

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_parsing_scancode_file_item.py RENAMED Viewed

@@ -10,12 +10,9 @@ import fosslight_util.constant as constant
 from fosslight_util.get_pom_license import get_license_from_pom
 from ._license_matched import MatchedLicense
 from ._scan_item import SourceItem
-from ._scan_item import is_exclude_dir
-from ._scan_item import is_exclude_file
 from ._scan_item import replace_word
 from ._scan_item import is_notice_file
 from ._scan_item import is_manifest_file
-from ._scan_item import is_package_dir
 from typing import Tuple
 logger = logging.getLogger(constant.LOGGER_NAME)
@@ -83,8 +80,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
     msg = []
     scancode_file_item = []
     license_list = {}  # Key :[license]+[matched_text], value: MatchedLicense()
-    prev_dir = ""
-    prev_dir_value = False
     if scancode_file_list:
         for file in scancode_file_list:
@@ -96,22 +91,11 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
                 is_binary = file.get("is_binary", False)
                 if "type" in file:
                     is_dir = file["type"] == "directory"
-                    if is_dir:
-                        prev_dir_value = is_exclude_dir(file_path)
-                        prev_dir = file_path
                 if not is_binary and not is_dir:
                     licenses = file.get("licenses", [])
                     copyright_list = file.get("copyrights", [])
                     result_item = SourceItem(file_path)
-                    is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
-                    if is_pkg:
-                        result_item.source_name_or_path = pkg_path
-                        if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
-                            result_item.exclude = True
-                            scancode_file_item.append(result_item)
-                        continue
                     if has_error and "scan_errors" in file:
                         error_msg = file.get("scan_errors", [])
@@ -238,8 +222,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
                                 set(license_expression_list))
                             result_item.comment = ','.join(license_expression_list)
-                        if is_exclude_file(file_path, prev_dir, prev_dir_value):
-                            result_item.exclude = True
                         scancode_file_item.append(result_item)
             except Exception as ex:
                 msg.append(f"Error Parsing item: {ex}")
@@ -271,17 +253,9 @@ def parsing_scancode_32_later(
                 is_binary = file.get("is_binary", False)
                 is_dir = file.get("type", "") == "directory"
                 if (not file_path) or is_binary or is_dir:
+                    logger.info(f"Skipping {file_path} because it is binary or directory")
                     continue
                 result_item = SourceItem(file_path)
-                is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
-                if is_pkg:
-                    result_item.source_name_or_path = pkg_path
-                    if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
-                        result_item.exclude = True
-                        scancode_file_item.append(result_item)
-                    continue
                 if has_error:
                     error_msg = file.get("scan_errors", [])
                     if error_msg:
@@ -334,8 +308,6 @@ def parsing_scancode_32_later(
                                             license_list[lic_matched_key] = lic_info
                                     license_detected.append(found_lic)
                 result_item.licenses = license_detected
-                result_item.exclude = is_exclude_file(file_path)
                 file_ext = os.path.splitext(file_path)[1].lower()
                 is_source_file = file_ext and file_ext in SOURCE_EXTENSIONS
                 result_item.is_license_text = is_notice_file(file_path) or (

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_parsing_scanoss_file.py RENAMED Viewed

@@ -3,12 +3,9 @@
 # Copyright (c) 2020 LG Electronics Inc.
 # SPDX-License-Identifier: Apache-2.0
-import os
 import logging
 import fosslight_util.constant as constant
 from ._scan_item import SourceItem
-from ._scan_item import is_exclude_file
-from ._scan_item import is_package_dir
 from ._scan_item import replace_word
 from typing import Tuple
@@ -18,7 +15,7 @@ SCANOSS_INFO_HEADER = ['No', 'Source Path', 'Component Declared', 'SPDX Tag',
                        'Matched Rate (line number)', 'scanoss_fileURL']
-def parsing_extraInfo(scanned_result: dict) -> list:
+def parsing_extra_info(scanned_result: dict) -> list:
     scanoss_extra_info = []
     for scan_item in scanned_result:
         license_w_source = scan_item.scanoss_reference
@@ -37,22 +34,14 @@ def parsing_extraInfo(scanned_result: dict) -> list:
     return scanoss_extra_info
-def parsing_scanResult(scanoss_report: dict, path_to_scan: str = "", path_to_exclude: list = []) -> Tuple[bool, list]:
+def parsing_scan_result(scanoss_report: dict, excluded_files: set = None) -> Tuple[bool, list]:
     scanoss_file_item = []
-    abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
     for file_path, findings in scanoss_report.items():
-        abs_file_path = os.path.abspath(os.path.join(path_to_scan, file_path))
-        if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path for exclude_path in abs_path_to_exclude):
+        file_path_normalized = file_path.replace('\\', '/')
+        if file_path_normalized in excluded_files:
             continue
         result_item = SourceItem(file_path)
-        is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
-        if is_pkg:
-            result_item.source_name_or_path = pkg_path
-            if not any(x.source_name_or_path == result_item.source_name_or_path for x in scanoss_file_item):
-                result_item.exclude = True
-                scanoss_file_item.append(result_item)
-            continue
         if 'id' in findings[0]:
             if "none" == findings[0]['id']:
@@ -86,9 +75,6 @@ def parsing_scanResult(scanoss_report: dict, path_to_scan: str = "", path_to_exc
                 result_item.licenses = license_detected
                 result_item.scanoss_reference = license_w_source
-        if is_exclude_file(file_path):
-            result_item.exclude = True
         if 'file_url' in findings[0]:
             result_item.fileURL = findings[0]['file_url']
         if 'matched' in findings[0]:

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_scan_item.py RENAMED Viewed

@@ -19,14 +19,6 @@ _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'c
                     '[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
                     'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
 _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'pubspec\.yaml$', r'.*\.podspec$', r'Cargo\.toml$']
-_exclude_filename = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
-                     "configure", "configure.ac", "depcomp", "compile", "missing", "makefile"]
-_exclude_extension = [".m4", ".in", ".po"]
-_exclude_directory = ["test", "tests", "doc", "docs"]
-_exclude_directory = [os.path.sep + dir_name +
-                      os.path.sep for dir_name in _exclude_directory]
-_exclude_directory.append("/.")
-_package_directory = ["node_modules", "venv", "Pods", "Carthage"]
 MAX_LICENSE_LENGTH = 200
 MAX_LICENSE_TOTAL_LENGTH = 600
 SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
@@ -209,39 +201,6 @@ class SourceItem(FileItem):
             return self.source_name_or_path == other.source_name_or_path
-def is_exclude_dir(dir_path: str) -> bool:
-    if dir_path:
-        dir_path = dir_path.lower()
-        dir_path = dir_path if dir_path.endswith(
-            os.path.sep) else dir_path + os.path.sep
-        dir_path = dir_path if dir_path.startswith(
-            os.path.sep) else os.path.sep + dir_path
-        return any(dir_name in dir_path for dir_name in _exclude_directory)
-    return False
-def is_exclude_file(file_path: str, prev_dir: str = None, prev_dir_exclude_value: bool = None) -> bool:
-    file_path = file_path.lower()
-    filename = os.path.basename(file_path)
-    if os.path.splitext(filename)[1] in _exclude_extension:
-        return True
-    if filename.startswith('.') or filename in _exclude_filename:
-        return True
-    dir_path = os.path.dirname(file_path)
-    if prev_dir is not None:  # running ScanCode
-        if dir_path == prev_dir:
-            return prev_dir_exclude_value
-        else:
-            # There will be no execution of this else statement.
-            # Because scancode json output results are sorted by path,
-            # most of them will match the previous if statement.
-            return is_exclude_dir(dir_path)
-    else:  # running SCANOSS
-        return is_exclude_dir(dir_path)
-    return False
 def is_notice_file(file_path: str) -> bool:
     pattern = r"({})(?<!w)".format("|".join(_notice_filename))
     filename = os.path.basename(file_path)
@@ -252,42 +211,3 @@ def is_manifest_file(file_path: str) -> bool:
     pattern = r"({})$".format("|".join(_manifest_filename))
     filename = os.path.basename(file_path)
     return bool(re.match(pattern, filename, re.IGNORECASE))
-def is_package_dir(dir_path: str) -> bool:
-    # scancode and scanoss use '/' as path separator regardless of OS
-    dir_path = dir_path.replace('\\', '/')
-    path_parts = dir_path.split('/')
-    for pkg_dir in _package_directory:
-        if pkg_dir in path_parts:
-            pkg_index = path_parts.index(pkg_dir)
-            pkg_path = '/'.join(path_parts[:pkg_index + 1])
-            return True, pkg_path
-    return False, ""
-def _has_parent_in_exclude_list(rel_path: str, path_to_exclude: list) -> bool:
-    path_parts = rel_path.replace('\\', '/').split('/')
-    for i in range(1, len(path_parts)):
-        parent_path = '/'.join(path_parts[:i])
-        if parent_path in path_to_exclude:
-            return True
-    return False
-def get_excluded_paths(path_to_scan: str, custom_excluded_paths: list = []) -> list:
-    path_to_exclude = custom_excluded_paths.copy()
-    abs_path_to_scan = os.path.abspath(path_to_scan)
-    for root, dirs, files in os.walk(path_to_scan):
-        for dir_name in dirs:
-            dir_path = os.path.join(root, dir_name)
-            rel_path = os.path.relpath(dir_path, abs_path_to_scan)
-            if not _has_parent_in_exclude_list(rel_path, path_to_exclude):
-                if dir_name in _package_directory:
-                    path_to_exclude.append(rel_path)
-                elif is_exclude_dir(rel_path):
-                    path_to_exclude.append(rel_path)
-    return path_to_exclude

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/cli.py RENAMED Viewed

@@ -14,13 +14,12 @@ from datetime import datetime
 import fosslight_util.constant as constant
 from fosslight_util.set_log import init_log
 from fosslight_util.timer_thread import TimerThread
-from fosslight_util.exclude import excluding_files
 from ._help import print_version, print_help_msg_source_scanner
 from ._license_matched import get_license_list_to_print
 from fosslight_util.output_format import check_output_formats_v2, write_output_file
 from fosslight_util.correct import correct_with_yaml
 from .run_scancode import run_scan
-from ._scan_item import get_excluded_paths
+from fosslight_util.exclude import get_excluded_paths
 from .run_scanoss import run_scanoss_py
 from .run_scanoss import get_scanoss_extra_info
 import yaml
@@ -38,6 +37,9 @@ MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
                                   'OSS Version', 'License', 'Download Location',
                                   'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
 SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
+EXCLUDE_FILENAME = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
+                    "configure", "configure.ac", "depcomp", "compile", "missing", "Makefile"]
+EXCLUDE_FILE_EXTENSION = [".m4", ".in", ".po"]
 logger = logging.getLogger(constant.LOGGER_NAME)
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -126,23 +128,6 @@ def main() -> None:
         sys.exit(1)
-def count_files(path_to_scan: str, path_to_exclude: list) -> Tuple[int, int]:
-    total_files = 0
-    excluded_files = 0
-    abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
-    for root, _, files in os.walk(path_to_scan):
-        for file in files:
-            file_path = os.path.join(root, file)
-            abs_file_path = os.path.abspath(file_path)
-            if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
-                   for exclude_path in abs_path_to_exclude):
-                excluded_files += 1
-            total_files += 1
-    return total_files, excluded_files
 def create_report_file(
     _start_time: str, merged_result: list,
     license_list: list, scanoss_result: list,
@@ -150,7 +135,7 @@ def create_report_file(
     output_path: str = "", output_files: list = [],
     output_extensions: list = [], correct_mode: bool = True,
     correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
-    formats: list = [], excluded_file_list: list = [], api_limit_exceed: bool = False
+    formats: list = [], api_limit_exceed: bool = False, files_count: int = 0
 ) -> 'ScannerItem':
     """
     Create report files for given scanned result.
@@ -209,7 +194,6 @@ def create_report_file(
     scan_item = ScannerItem(PKG_NAME, _start_time)
     scan_item.set_cover_pathinfo(path_to_scan, path_to_exclude)
-    files_count, _ = count_files(path_to_scan, path_to_exclude)
     scan_item.set_cover_comment(f"Scanned files: {files_count}")
     if api_limit_exceed:
@@ -223,12 +207,6 @@ def create_report_file(
     if merged_result:
         sheet_list = {}
-        # Remove results that are in excluding file list
-        for i in range(len(merged_result) - 1, -1, -1):  # Iterate from last to first
-            item_path = merged_result[i].source_name_or_path  # Assuming SourceItem has 'file_path' attribute
-            if item_path in excluded_file_list:
-                del merged_result[i]  # Delete matching item
         scan_item.append_file_items(merged_result, PKG_NAME)
         if selected_scanner == 'scanoss':
@@ -365,15 +343,17 @@ def run_scanners(
     logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
                                   True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
-    excluded_file_list = excluding_files(path_to_exclude, path_to_scan)
     if '.xlsx' not in output_extensions and print_matched_text:
         logger.warning("-m option is only available for excel.")
         print_matched_text = False
     if success:
-        excluded_path_with_default_exclusion = get_excluded_paths(path_to_scan, path_to_exclude)
+        path_to_exclude_with_filename = path_to_exclude + EXCLUDE_FILENAME
+        excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped = (
+            get_excluded_paths(path_to_scan, path_to_exclude_with_filename, EXCLUDE_FILE_EXTENSION))
         logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
         if not selected_scanner:
             selected_scanner = 'all'
         if selected_scanner in ['scancode', 'all', 'kb']:
@@ -381,18 +361,20 @@ def run_scanners(
                                                                                       write_json_file, num_cores, True,
                                                                                       print_matched_text, formats, called_by_cli,
                                                                                       time_out, correct_mode, correct_filepath,
-                                                                                      excluded_path_with_default_exclusion)
+                                                                                      excluded_path_with_default_exclusion,
+                                                                                      excluded_files)
+        excluded_files = set(excluded_files) if excluded_files else set()
         if selected_scanner in ['scanoss', 'all']:
             scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True, write_json_file,
-                                                              num_cores, excluded_path_with_default_exclusion)
+                                                              num_cores, excluded_path_with_default_exclusion, excluded_files)
         if selected_scanner in SCANNER_TYPE:
             run_kb = True if selected_scanner in ['kb', 'all'] else False
-            spdx_downloads = get_spdx_downloads(path_to_scan, excluded_path_with_default_exclusion)
+            spdx_downloads = get_spdx_downloads(path_to_scan, excluded_files)
             merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads, path_to_scan, run_kb)
             scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
                                            print_matched_text, output_path, output_files, output_extensions, correct_mode,
-                                           correct_filepath, path_to_scan, path_to_exclude, formats, excluded_file_list,
-                                           api_limit_exceed)
+                                           correct_filepath, path_to_scan, excluded_path_without_dot, formats,
+                                           api_limit_exceed, cnt_file_except_skipped)
         else:
             print_help_msg_source_scanner()
             result_log[RESULT_KEY] = "Unsupported scanner"

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/run_scancode.py RENAMED Viewed

@@ -29,7 +29,8 @@ def run_scan(
     return_results: bool = False, need_license: bool = False,
     formats: list = [], called_by_cli: bool = False,
     time_out: int = 120, correct_mode: bool = True,
-    correct_filepath: str = "", path_to_exclude: list = []
+    correct_filepath: str = "", path_to_exclude: list = [],
+    excluded_files: list = []
 ) -> Tuple[bool, str, list, list]:
     if not called_by_cli:
         global logger
@@ -90,39 +91,40 @@ def run_scan(
                         exclude_path_normalized = os.path.normpath(exclude_path).replace("\\", "/")
                         if exclude_path_normalized.endswith("/**"):
-                            exclude_path_normalized = exclude_path_normalized[:-3]
-                        elif exclude_path_normalized.endswith("**"):
-                            exclude_path_normalized = exclude_path_normalized.rstrip("*")
-                        if exclude_path_normalized.startswith("**/"):
-                            exclude_path_normalized = exclude_path_normalized[3:]
-                        full_exclude_path = os.path.join(abs_path_to_scan, exclude_path)
-                        is_dir = os.path.isdir(full_exclude_path)
-                        is_file = os.path.isfile(full_exclude_path)
-                        if is_dir:
-                            dir_name = os.path.basename(exclude_path_normalized.rstrip("/"))
-                            base_path = exclude_path_normalized.rstrip("/")
-                            if dir_name:
-                                total_files_to_excluded.append(dir_name)
-                                max_depth = 0
-                                for root, dirs, files in os.walk(full_exclude_path):
-                                    depth = root[len(full_exclude_path):].count(os.sep)
-                                    max_depth = max(max_depth, depth)
-                                for depth in range(1, max_depth + 2):
-                                    pattern = base_path + "/*" * depth
-                                    total_files_to_excluded.append(pattern)
+                            base_dir = exclude_path_normalized[:-3].rstrip("/")
+                            if base_dir:
+                                full_exclude_path = os.path.join(abs_path_to_scan, base_dir)
+                                if os.path.isdir(full_exclude_path):
+                                    total_files_to_excluded.append(base_dir)
+                                    total_files_to_excluded.append(exclude_path_normalized)
+                                else:
+                                    total_files_to_excluded.append(exclude_path_normalized)
                             else:
                                 total_files_to_excluded.append(exclude_path_normalized)
-                        elif is_file:
-                            total_files_to_excluded.append(exclude_path_normalized)
                         else:
-                            if "/" in exclude_path_normalized:
-                                dir_name = os.path.basename(exclude_path_normalized.rstrip("/"))
-                                if dir_name:
-                                    total_files_to_excluded.append(dir_name)
-                            total_files_to_excluded.append(exclude_path_normalized)
+                            has_glob_chars = any(char in exclude_path_normalized for char in ['*', '?', '['])
+                            if not has_glob_chars:
+                                full_exclude_path = os.path.join(abs_path_to_scan, exclude_path_normalized)
+                                is_dir = os.path.isdir(full_exclude_path)
+                                is_file = os.path.isfile(full_exclude_path)
+                            else:
+                                is_dir = False
+                                is_file = False
+                            if is_dir:
+                                base_path = exclude_path_normalized.rstrip("/")
+                                if base_path:
+                                    total_files_to_excluded.append(base_path)
+                                    total_files_to_excluded.append(f"{base_path}/**")
+                                else:
+                                    total_files_to_excluded.append(exclude_path_normalized)
+                            elif is_file:
+                                total_files_to_excluded.append(f"**/{exclude_path_normalized}")
+                            else:
+                                total_files_to_excluded.append(exclude_path_normalized)
+                if excluded_files:
+                    total_files_to_excluded.extend(f"**/{file_path}" for file_path in excluded_files)
                 total_files_to_excluded = sorted(list(set(total_files_to_excluded)))
                 ignore_tuple = tuple(total_files_to_excluded)

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/run_scanoss.py RENAMED Viewed

@@ -12,8 +12,8 @@ from datetime import datetime
 import fosslight_util.constant as constant
 from fosslight_util.set_log import init_log
 from fosslight_util.output_format import check_output_formats_v2  # , write_output_file
-from ._parsing_scanoss_file import parsing_scanResult  # scanoss
-from ._parsing_scanoss_file import parsing_extraInfo  # scanoss
+from ._parsing_scanoss_file import parsing_scan_result  # scanoss
+from ._parsing_scanoss_file import parsing_extra_info  # scanoss
 import shutil
 from pathlib import Path
 from scanoss.scanner import Scanner, ScanType
@@ -28,11 +28,12 @@ SCANOSS_OUTPUT_FILE = "scanoss_raw_result.json"
 def get_scanoss_extra_info(scanned_result: dict) -> list:
-    return parsing_extraInfo(scanned_result)
+    return parsing_extra_info(scanned_result)
-def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [], called_by_cli: bool = False,
-                   write_json_file: bool = False, num_threads: int = -1, path_to_exclude: list = []) -> list:
+def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
+                   called_by_cli: bool = False, write_json_file: bool = False, num_threads: int = -1,
+                   path_to_exclude: list = [], excluded_files: set = None) -> list:
     """
     Run scanoss.py for the given path.
@@ -72,7 +73,7 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
     try:
         scanner = Scanner(
             ignore_cert_errors=True,
-            skip_folders=path_to_exclude,
+            skip_folders=list(path_to_exclude) if path_to_exclude else [],
             scan_output=output_json_file,
             scan_options=ScanType.SCAN_SNIPPETS.value,
             nb_threads=num_threads if num_threads > 0 else 10
@@ -86,30 +87,16 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
         logger.debug(f"{captured_output}")
         if os.path.isfile(output_json_file):
-            total_files_to_excluded = []
-            if path_to_exclude:
-                for path in path_to_exclude:
-                    path = os.path.join(path_to_scan, os.path.relpath(path, os.path.abspath(path_to_scan))) \
-                           if not os.path.isabs(path_to_scan) and os.path.isabs(path) else os.path.join(path_to_scan, path)
-                    if os.path.isdir(path):
-                        for root, _, files in os.walk(path):
-                            root = root[len(path_to_scan) + 1:]
-                            total_files_to_excluded.extend([os.path.normpath(os.path.join(root, file)).replace('\\', '/')
-                                                            for file in files])
-                    elif os.path.isfile(path):
-                        path = path[len(path_to_scan) + 1:]
-                        total_files_to_excluded.append(os.path.normpath(path).replace('\\', '/'))
             with open(output_json_file, "r") as st_json:
                 st_python = json.load(st_json)
-                for key_to_exclude in total_files_to_excluded:
+                for key_to_exclude in excluded_files:
                     if key_to_exclude in st_python:
                         del st_python[key_to_exclude]
             with open(output_json_file, 'w') as st_json:
                 json.dump(st_python, st_json, indent=4)
             with open(output_json_file, "r") as st_json:
                 st_python = json.load(st_json)
-                scanoss_file_list = parsing_scanResult(st_python, path_to_scan, path_to_exclude)
+                scanoss_file_list = parsing_scan_result(st_python, excluded_files)
     except Exception as error:
         logger.debug(f"SCANOSS Parsing {path_to_scan}: {error}")

fosslight_source-2.2.1/src/fosslight_source/run_spdx_extractor.py ADDED Viewed

@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2023 LG Electronics Inc.
+# SPDX-License-Identifier: Apache-2.0
+import os
+import logging
+import re
+import fosslight_util.constant as constant
+import mmap
+logger = logging.getLogger(constant.LOGGER_NAME)
+def get_spdx_downloads(path_to_scan: str, path_to_exclude: set = None) -> dict:
+    download_dict = {}
+    find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
+    abs_path_to_scan = os.path.abspath(path_to_scan)
+    for root, dirs, files in os.walk(path_to_scan):
+        for file in files:
+            file_path = os.path.join(root, file)
+            rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
+            if rel_path_file in path_to_exclude:
+                continue
+            try:
+                if os.path.getsize(file_path) > 0:
+                    with open(file_path, "r") as f:
+                        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
+                            for word in find_word.findall(mmap_obj):
+                                if rel_path_file in download_dict:
+                                    download_dict[rel_path_file].append(word.decode('utf-8'))
+                                else:
+                                    download_dict[rel_path_file] = [word.decode('utf-8')]
+            except Exception as ex:
+                logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {ex}")
+    return download_dict

{fosslight_source-2.2.0 → fosslight_source-2.2.1/src/fosslight_source.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fosslight_source
-Version: 2.2.0
+Version: 2.2.1
 Summary: FOSSLight Source Scanner
 Home-page: https://github.com/fosslight/fosslight_source_scanner
 Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -17,7 +17,7 @@ License-File: LICENSE
 Requires-Dist: pyparsing
 Requires-Dist: scanoss>=1.18.0
 Requires-Dist: XlsxWriter
-Requires-Dist: fosslight_util>=2.1.31
+Requires-Dist: fosslight_util>=2.1.34
 Requires-Dist: PyYAML
 Requires-Dist: wheel>=0.38.1
 Requires-Dist: intbitset

{fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source.egg-info/requires.txt RENAMED Viewed

@@ -1,7 +1,7 @@
 pyparsing
 scanoss>=1.18.0
 XlsxWriter
-fosslight_util>=2.1.31
+fosslight_util>=2.1.34
 PyYAML
 wheel>=0.38.1
 intbitset

fosslight_source-2.2.0/src/fosslight_source/run_spdx_extractor.py DELETED Viewed

@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright (c) 2023 LG Electronics Inc.
-# SPDX-License-Identifier: Apache-2.0
-import os
-import logging
-import re
-import fosslight_util.constant as constant
-import mmap
-logger = logging.getLogger(constant.LOGGER_NAME)
-def get_file_list(path_to_scan: str, path_to_exclude: list = []) -> list:
-    file_list = []
-    abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
-    for root, dirs, files in os.walk(path_to_scan):
-        for file in files:
-            file_path = os.path.join(root, file)
-            abs_file_path = os.path.abspath(file_path)
-            if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
-                    for exclude_path in abs_path_to_exclude):
-                continue
-            file_list.append(file_path)
-    return file_list
-def get_spdx_downloads(path_to_scan: str, path_to_exclude: list = []) -> dict:
-    download_dict = {}
-    find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
-    file_list = get_file_list(path_to_scan, path_to_exclude)
-    for file in file_list:
-        try:
-            rel_path_file = os.path.relpath(file, path_to_scan)
-            # remove the path_to_scan from the file paths
-            if os.path.getsize(file) > 0:
-                with open(file, "r") as f:
-                    with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
-                        for word in find_word.findall(mmap_obj):
-                            if rel_path_file in download_dict:
-                                download_dict[rel_path_file].append(word.decode('utf-8'))
-                            else:
-                                download_dict[rel_path_file] = [word.decode('utf-8')]
-        except Exception as ex:
-            msg = str(ex)
-            logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {msg}")
-    return download_dict