fosslight-source 2.3.0__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {fosslight_source-2.3.0/src/fosslight_source.egg-info → fosslight_source-2.3.1}/PKG-INFO +1 -1
  2. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/pyproject.toml +1 -1
  3. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/cli.py +113 -71
  4. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/run_scancode.py +11 -2
  5. {fosslight_source-2.3.0 → fosslight_source-2.3.1/src/fosslight_source.egg-info}/PKG-INFO +1 -1
  6. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/tests/test_tox.py +65 -2
  7. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/LICENSE +0 -0
  8. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/MANIFEST.in +0 -0
  9. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/README.md +0 -0
  10. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/setup.cfg +0 -0
  11. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/__init__.py +0 -0
  12. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/_help.py +0 -0
  13. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/_kb_client.py +0 -0
  14. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/_license_matched.py +0 -0
  15. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/_parsing_scancode_file_item.py +0 -0
  16. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
  17. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/_scan_item.py +0 -0
  18. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/run_manifest_extractor.py +0 -0
  19. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/run_scanoss.py +0 -0
  20. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source/run_spdx_extractor.py +0 -0
  21. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/SOURCES.txt +0 -0
  22. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
  23. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/entry_points.txt +0 -0
  24. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/requires.txt +0 -0
  25. {fosslight_source-2.3.0 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Summary: FOSSLight Source Scanner
5
5
  Author: LG Electronics
6
6
  License-Expression: Apache-2.0
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "fosslight_source"
10
- version = "2.3.0"
10
+ version = "2.3.1"
11
11
  description = "FOSSLight Source Scanner"
12
12
  readme = "README.md"
13
13
  license = "Apache-2.0"
@@ -338,13 +338,19 @@ def _collect_kb_file_hashes(
338
338
  excluded_files: set,
339
339
  hide_progress: bool,
340
340
  ) -> tuple[list[str], list[tuple[SourceItem, str]]]:
341
- """Collect MD5 hashes from scancode results and walk targets, plus (extra_item, md5) candidates."""
341
+ """Collect MD5 hashes from scancode results and walk targets, plus (extra_item, md5) candidates.
342
+
343
+ Skips license/notice files and scancode_result items that already have download_location.
344
+ ScanOSS/SPDX results are merged into scancode_result before this runs.
345
+ """
342
346
  file_hashes: list[str] = []
343
347
  extra_candidates: list[tuple[SourceItem, str]] = []
344
348
 
345
349
  for item in scancode_result:
346
350
  if item.is_license_text or is_notice_file(item.source_name_or_path):
347
351
  continue
352
+ if item.download_location:
353
+ continue
348
354
  md5_hash, _wfp = item._get_hash(path_to_scan)
349
355
  if md5_hash:
350
356
  item._cached_kb_md5 = md5_hash
@@ -453,6 +459,32 @@ def merge_results(
453
459
  return scancode_result, kb_status_message, kb_requested_count, kb_returned_count
454
460
 
455
461
 
462
+ def _finalize_temp_output(
463
+ temp_output_path: str,
464
+ final_output_path: str,
465
+ publish: bool,
466
+ log: Optional[logging.Logger] = None,
467
+ ) -> bool:
468
+ """Copy scan artifacts from temp dir, then always remove the temp directory."""
469
+ if not temp_output_path or not os.path.isdir(temp_output_path):
470
+ return True
471
+ publish_ok = True
472
+ try:
473
+ if publish:
474
+ shutil.copytree(temp_output_path, final_output_path, dirs_exist_ok=True)
475
+ except Exception as ex:
476
+ publish_ok = False
477
+ if log:
478
+ log.error(f"Failed to publish scan artifacts: {ex}")
479
+ finally:
480
+ try:
481
+ shutil.rmtree(temp_output_path)
482
+ except Exception as ex:
483
+ if log:
484
+ log.debug(f"Failed to cleanup temp output directory: {ex}")
485
+ return publish_ok
486
+
487
+
456
488
  def run_scanners(
457
489
  path_to_scan: str, output_file_name: str = "",
458
490
  write_json_file: bool = False, num_cores: int = -1,
@@ -498,84 +530,94 @@ def run_scanners(
498
530
  output_path = os.getcwd()
499
531
  final_output_path = output_path
500
532
  output_path = os.path.join(os.path.dirname(output_path), f'.fosslight_temp_{start_time}')
533
+ publish_temp_output = False
534
+ logger = None
535
+ publish_ok = True
501
536
 
502
- logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
503
- True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
537
+ try:
538
+ logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
539
+ True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
504
540
 
505
- logger.info(f"Tool Info : {result_log['Tool Info']}")
541
+ logger.info(f"Tool Info : {result_log['Tool Info']}")
506
542
 
507
- if '.xlsx' not in output_extensions and print_matched_text:
508
- logger.warning("-m option is only available for excel.")
509
- print_matched_text = False
543
+ if '.xlsx' not in output_extensions and print_matched_text:
544
+ logger.warning("-m option is only available for excel.")
545
+ print_matched_text = False
510
546
 
511
- if success:
512
- if all_exclude_mode and len(all_exclude_mode) == 4:
513
- (excluded_path_with_default_exclusion,
514
- excluded_path_without_dot,
515
- excluded_files,
516
- cnt_file_except_skipped) = all_exclude_mode
517
- else:
518
- path_to_exclude_with_filename = path_to_exclude
519
- (excluded_path_with_default_exclusion,
520
- excluded_path_without_dot,
521
- excluded_files,
522
- cnt_file_except_skipped) = get_excluded_paths(path_to_scan, path_to_exclude_with_filename)
523
- logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
524
-
525
- if not selected_scanner:
526
- selected_scanner = ALL_MODE
527
- if selected_scanner in ['scancode', ALL_MODE]:
528
- success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
529
- write_json_file, num_cores, True,
530
- print_matched_text, formats, called_by_cli,
531
- time_out, correct_mode, correct_filepath,
532
- excluded_path_with_default_exclusion,
533
- excluded_files, hide_progress)
534
- excluded_files = set(excluded_files) if excluded_files else set()
535
- if selected_scanner in ['scanoss', ALL_MODE]:
536
- scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_path, formats, True, num_cores,
537
- excluded_path_with_default_exclusion, excluded_files,
538
- write_json_file, hide_progress)
539
-
540
- run_kb_msg = ""
541
- if selected_scanner in SCANNER_TYPE:
542
- run_kb = True if selected_scanner in ['kb', ALL_MODE] else False
543
- if run_kb:
544
- if not check_kb_server_reachable(kb_url, kb_token):
545
- run_kb = False
546
- run_kb_msg = f"KB({kb_url}) Unreachable"
547
-
548
- spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
549
- merged_result, kb_status_message, kb_requested_count, kb_returned_count = merge_results(
550
- scancode_result, scanoss_result, spdx_downloads,
551
- path_to_scan, run_kb, manifest_licenses, excluded_files,
552
- hide_progress, kb_url, kb_token,
553
- )
554
- if kb_status_message:
555
- run_kb_msg = f"KB({kb_url}) {kb_status_message}"
556
- elif run_kb and kb_requested_count > 0:
557
- run_kb_msg = (
558
- f"KB({kb_url}) response : {kb_returned_count}/"
559
- f" requested: {kb_requested_count}"
547
+ if success:
548
+ if all_exclude_mode and len(all_exclude_mode) == 4:
549
+ (excluded_path_with_default_exclusion,
550
+ excluded_path_without_dot,
551
+ excluded_files,
552
+ cnt_file_except_skipped) = all_exclude_mode
553
+ else:
554
+ path_to_exclude_with_filename = path_to_exclude
555
+ (excluded_path_with_default_exclusion,
556
+ excluded_path_without_dot,
557
+ excluded_files,
558
+ cnt_file_except_skipped) = get_excluded_paths(path_to_scan, path_to_exclude_with_filename)
559
+ logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
560
+
561
+ if not selected_scanner:
562
+ selected_scanner = ALL_MODE
563
+ if selected_scanner in ['scancode', ALL_MODE]:
564
+ success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(
565
+ path_to_scan, output_file_name, write_json_file, num_cores, True,
566
+ print_matched_text, formats, called_by_cli, time_out, correct_mode,
567
+ correct_filepath, excluded_path_with_default_exclusion,
568
+ excluded_files, hide_progress,
569
+ )
570
+ excluded_files = set(excluded_files) if excluded_files else set()
571
+ if selected_scanner in ['scanoss', ALL_MODE]:
572
+ scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_path, formats, True, num_cores,
573
+ excluded_path_with_default_exclusion, excluded_files,
574
+ write_json_file, hide_progress)
575
+
576
+ run_kb_msg = ""
577
+ if selected_scanner in SCANNER_TYPE:
578
+ run_kb = True if selected_scanner in ['kb', ALL_MODE] else False
579
+ if run_kb:
580
+ if not check_kb_server_reachable(kb_url, kb_token):
581
+ run_kb = False
582
+ run_kb_msg = f"KB({kb_url}) Unreachable"
583
+
584
+ spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
585
+ merged_result, kb_status_message, kb_requested_count, kb_returned_count = merge_results(
586
+ scancode_result, scanoss_result, spdx_downloads,
587
+ path_to_scan, run_kb, manifest_licenses, excluded_files,
588
+ hide_progress, kb_url, kb_token,
560
589
  )
561
- mark_oss_info_correction_files_as_excluded(merged_result)
562
- scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
563
- print_matched_text, output_path, output_files, output_extensions, correct_mode,
564
- correct_filepath, path_to_scan, excluded_path_without_dot, formats,
565
- api_limit_exceed, cnt_file_except_skipped, final_output_path, run_kb_msg)
590
+ if kb_status_message:
591
+ run_kb_msg = f"KB({kb_url}) {kb_status_message}"
592
+ elif run_kb and kb_requested_count > 0:
593
+ run_kb_msg = (
594
+ f"KB({kb_url}) response : {kb_returned_count}/"
595
+ f" requested: {kb_requested_count}"
596
+ )
597
+ mark_oss_info_correction_files_as_excluded(merged_result)
598
+ scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
599
+ print_matched_text, output_path, output_files, output_extensions, correct_mode,
600
+ correct_filepath, path_to_scan, excluded_path_without_dot, formats,
601
+ api_limit_exceed, cnt_file_except_skipped, final_output_path, run_kb_msg)
602
+ else:
603
+ print_help_msg_source_scanner()
604
+ result_log[RESULT_KEY] = "Unsupported scanner"
605
+ success = False
566
606
  else:
567
- print_help_msg_source_scanner()
568
- result_log[RESULT_KEY] = "Unsupported scanner"
607
+ result_log[RESULT_KEY] = f"Format error. {msg}"
569
608
  success = False
570
- else:
571
- result_log[RESULT_KEY] = f"Format error. {msg}"
572
- success = False
573
609
 
574
- try:
575
- shutil.copytree(output_path, final_output_path, dirs_exist_ok=True)
576
- shutil.rmtree(output_path)
577
- except Exception as ex:
578
- logger.debug(f"Failed to move temp files: {ex}")
610
+ publish_temp_output = True
611
+ finally:
612
+ publish_ok = _finalize_temp_output(output_path, final_output_path, publish_temp_output, logger)
613
+
614
+ if publish_temp_output and not publish_ok:
615
+ success = False
616
+ prev_msg = result_log.get(RESULT_KEY, "")
617
+ result_log[RESULT_KEY] = (
618
+ f"{prev_msg}, Failed to publish scan artifacts" if prev_msg
619
+ else "Failed to publish scan artifacts"
620
+ )
579
621
 
580
622
  return success, result_log.get(RESULT_KEY, ""), scan_item, license_list, scanoss_result
581
623
 
@@ -63,14 +63,23 @@ def _apply_scancode_unset_workaround(kwargs: dict) -> None:
63
63
  logger.debug("scancode UNSET workaround skipped: %s", ex)
64
64
 
65
65
 
66
+ def _directory_ignore_pattern(dir_name: str) -> str:
67
+ """Path-based glob for a directory name (avoids matching the scan root itself)."""
68
+ normalized = dir_name.strip().strip("/").replace("\\", "/")
69
+ if not normalized:
70
+ return dir_name
71
+ return f"**/{normalized}/**"
72
+
73
+
66
74
  def _default_scancode_coarse_ignore_patterns() -> frozenset:
67
75
  """
68
76
  Coarse ignore patterns aligned with fosslight_util.get_excluded_paths() rules.
69
- Uses segment-style globs so scancode does not need one pattern per file.
77
+ Directory names use path-based globs (e.g. **/tests/**) so they do not match
78
+ the scan root directory name itself.
70
79
  """
71
80
  patterns = {".*"}
72
81
  for name in PACKAGE_DIRECTORY + EXCLUDE_DIRECTORY:
73
- patterns.add(name)
82
+ patterns.add(_directory_ignore_pattern(name))
74
83
  for ext in EXCLUDE_FILE_EXTENSION:
75
84
  patterns.add(f"*.{ext}")
76
85
  for name in EXCLUDE_FILENAME:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Summary: FOSSLight Source Scanner
5
5
  Author: LG Electronics
6
6
  License-Expression: Apache-2.0
@@ -3,10 +3,13 @@
3
3
  # Copyright (c) 2020 LG Electronics Inc.
4
4
  # SPDX-License-Identifier: Apache-2.0
5
5
  import os
6
+ import shlex
6
7
  import subprocess
7
8
  import pytest
8
9
  import shutil
9
10
  import sys
11
+ import csv
12
+ import glob
10
13
 
11
14
  # Add project root to sys.path for importing FL Source modules
12
15
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
@@ -18,6 +21,26 @@ from fosslight_source._parsing_scancode_file_item import (
18
21
  )
19
22
 
20
23
  remove_directories = ["test_scan", "test_scan2", "test_scan3"]
24
+ TEST_FILES_SCAN_DIR = "test_scan"
25
+
26
+
27
+ def _parse_license_tokens(license_value: str) -> set[str]:
28
+ return {token.strip().lower() for token in (license_value or "").split(",") if token.strip()}
29
+
30
+
31
+ def _read_src_csv_rows(csv_path: str) -> list[dict]:
32
+ with open(csv_path, "r", encoding="utf-8") as file:
33
+ return list(csv.DictReader(file, delimiter="\t"))
34
+
35
+
36
+ def _rows_for_source(rows: list[dict], source_name: str) -> list[dict]:
37
+ return [row for row in rows if row.get("Source Path") == source_name]
38
+
39
+
40
+ def _find_scan_csv(output_dir: str) -> str:
41
+ csv_files = sorted(glob.glob(os.path.join(output_dir, "*.csv")))
42
+ assert csv_files, f"No CSV report found under {output_dir}"
43
+ return csv_files[-1]
21
44
 
22
45
 
23
46
  @pytest.fixture(scope="module", autouse=True)
@@ -31,8 +54,22 @@ def setup_test_result_dir():
31
54
 
32
55
 
33
56
  def run_command(command):
34
- process = subprocess.run(command, shell=True, capture_output=True, text=True)
35
- success = (process.returncode == 0)
57
+ command = command.strip()
58
+ if command.startswith("fosslight_source"):
59
+ args = shlex.split(command, posix=(os.name != "nt"))[1:]
60
+ if os.environ.get("FOSSLIGHT_USE_LOCAL_SRC"):
61
+ src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))
62
+ env = os.environ.copy()
63
+ existing = env.get("PYTHONPATH", "")
64
+ env["PYTHONPATH"] = src_path if not existing else f"{src_path}{os.pathsep}{existing}"
65
+ cmd = [sys.executable, "-m", "fosslight_source.cli", *args]
66
+ process = subprocess.run(cmd, capture_output=True, text=True, env=env)
67
+ else:
68
+ cmd = ["fosslight_source", *args]
69
+ process = subprocess.run(cmd, capture_output=True, text=True)
70
+ else:
71
+ process = subprocess.run(command, shell=True, capture_output=True, text=True)
72
+ success = process.returncode == 0
36
73
  return success, process.stdout if success else process.stderr
37
74
 
38
75
 
@@ -112,6 +149,32 @@ def test_run():
112
149
  assert len(scan2_files) > 0, "Test Run: No scan files created in test_scan2 directory"
113
150
 
114
151
 
152
+ def test_test_files_scan_results():
153
+ os.makedirs(TEST_FILES_SCAN_DIR, exist_ok=True)
154
+
155
+ success, msg = run_command(
156
+ f"fosslight_source -p tests/test_files -s scancode -f csv -o {TEST_FILES_SCAN_DIR}/"
157
+ )
158
+ assert success is True, f"Test Run: test_files scan failed: {msg}"
159
+
160
+ csv_path = _find_scan_csv(TEST_FILES_SCAN_DIR)
161
+ rows = _read_src_csv_rows(csv_path)
162
+
163
+ sample_rows = _rows_for_source(rows, "sample.cpp")
164
+ assert sample_rows, "Test Run: sample.cpp not found in scan result"
165
+ for row in sample_rows:
166
+ licenses = _parse_license_tokens(row.get("License", ""))
167
+ assert "apache-2.0" in licenses, f"sample.cpp missing Apache-2.0 license: {row.get('License')}"
168
+ assert "mit" in licenses, f"sample.cpp missing MIT license: {row.get('License')}"
169
+
170
+ temp_rows = _rows_for_source(rows, "temp.cpp")
171
+ assert temp_rows, "Test Run: temp.cpp not found in scan result"
172
+ temp_row = temp_rows[0]
173
+ temp_licenses = _parse_license_tokens(temp_row.get("License", ""))
174
+ assert "apache-2.0" in temp_licenses, f"temp.cpp missing Apache-2.0 license: {temp_row.get('License')}"
175
+ assert (temp_row.get("Copyright Text") or "").strip(), "Test Run: temp.cpp copyright not extracted"
176
+
177
+
115
178
  def test_help_command():
116
179
  success, msg = run_command("fosslight_source -h")
117
180
  assert success is True, f"Test Release: Help command failed :{msg}"