fosslight-source 2.2.13__tar.gz → 2.2.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {fosslight_source-2.2.13/src/fosslight_source.egg-info → fosslight_source-2.2.15}/PKG-INFO +3 -2
  2. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/pyproject.toml +3 -2
  3. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/_help.py +1 -0
  4. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/_scan_item.py +9 -1
  5. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/cli.py +29 -22
  6. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/run_manifest_extractor.py +49 -0
  7. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/run_scancode.py +21 -4
  8. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/run_scanoss.py +3 -2
  9. {fosslight_source-2.2.13 → fosslight_source-2.2.15/src/fosslight_source.egg-info}/PKG-INFO +3 -2
  10. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source.egg-info/requires.txt +2 -1
  11. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/LICENSE +0 -0
  12. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/MANIFEST.in +0 -0
  13. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/README.md +0 -0
  14. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/setup.cfg +0 -0
  15. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/__init__.py +0 -0
  16. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/_license_matched.py +0 -0
  17. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/_parsing_scancode_file_item.py +0 -0
  18. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
  19. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source/run_spdx_extractor.py +0 -0
  20. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source.egg-info/SOURCES.txt +0 -0
  21. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
  22. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source.egg-info/entry_points.txt +0 -0
  23. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/src/fosslight_source.egg-info/top_level.txt +0 -0
  24. {fosslight_source-2.2.13 → fosslight_source-2.2.15}/tests/test_tox.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.13
3
+ Version: 2.2.15
4
4
  Summary: FOSSLight Source Scanner
5
5
  Author: LG Electronics
6
6
  License-Expression: Apache-2.0
@@ -24,11 +24,12 @@ Requires-Dist: fosslight_util>=2.1.37
24
24
  Requires-Dist: PyYAML
25
25
  Requires-Dist: wheel>=0.38.1
26
26
  Requires-Dist: intbitset
27
- Requires-Dist: fosslight_binary>=5.0.0
27
+ Requires-Dist: fosslight_binary>=5.1.22
28
28
  Requires-Dist: scancode-toolkit>=32.0.2
29
29
  Requires-Dist: fingerprints==1.2.3
30
30
  Requires-Dist: normality==2.6.1
31
31
  Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
32
+ Requires-Dist: tqdm
32
33
  Dynamic: license-file
33
34
 
34
35
  <!--
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "fosslight_source"
10
- version = "2.2.13"
10
+ version = "2.2.15"
11
11
  description = "FOSSLight Source Scanner"
12
12
  readme = "README.md"
13
13
  license = "Apache-2.0"
@@ -33,12 +33,13 @@ dependencies = [
33
33
  "PyYAML",
34
34
  "wheel>=0.38.1",
35
35
  "intbitset",
36
- "fosslight_binary>=5.0.0",
36
+ "fosslight_binary>=5.1.22",
37
37
  "scancode-toolkit>=32.0.2",
38
38
  "fingerprints==1.2.3",
39
39
  "normality==2.6.1",
40
40
  # Python 3.13+ needs psycopg2-binary 2.9.10+ (has wheels; 2.9.9 builds fail with _PyInterpreterState_Get)
41
41
  "psycopg2-binary>=2.9.10; python_version >= '3.13'",
42
+ "tqdm",
42
43
  ]
43
44
 
44
45
  [project.optional-dependencies]
@@ -41,6 +41,7 @@ _HELP_MESSAGE_SOURCE_SCANNER = f"""
41
41
  -j Generate raw scanner results in JSON format
42
42
  --no_correction Skip OSS information correction with sbom-info.yaml
43
43
  --correct_fpath <path> Path to custom sbom-info.yaml file
44
+ --hide_progress Hide the progress bar during scanning
44
45
 
45
46
  💡 Examples
46
47
  ────────────────────────────────────────────────────────────────────
@@ -19,7 +19,15 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
19
19
  _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
20
20
  '[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
21
21
  'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
22
- _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'setup\.cfg$', r'.*\.podspec$', r'Cargo\.toml$']
22
+ _manifest_filename = [
23
+ r'.*\.pom$',
24
+ r'package\.json$',
25
+ r'setup\.py$',
26
+ r'setup\.cfg$',
27
+ r'.*\.podspec$',
28
+ r'Cargo\.toml$',
29
+ r'huggingface_hub_metadata\.json$',
30
+ ]
23
31
  MAX_LICENSE_LENGTH = 200
24
32
  MAX_LICENSE_TOTAL_LENGTH = 600
25
33
  SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
@@ -14,7 +14,6 @@ import urllib.error
14
14
  from datetime import datetime
15
15
  import fosslight_util.constant as constant
16
16
  from fosslight_util.set_log import init_log
17
- from fosslight_util.timer_thread import TimerThread
18
17
  from ._help import print_version, print_help_msg_source_scanner
19
18
  from ._license_matched import get_license_list_to_print
20
19
  from fosslight_util.output_format import check_output_formats_v2, write_output_file
@@ -81,6 +80,7 @@ def main() -> None:
81
80
  parser.add_argument('-e', '--exclude', nargs='*', required=False, default=[])
82
81
  parser.add_argument('--no_correction', action='store_true', required=False)
83
82
  parser.add_argument('--correct_fpath', nargs=1, type=str, required=False)
83
+ parser.add_argument('--hide_progress', action='store_true', required=False)
84
84
 
85
85
  args = parser.parse_args()
86
86
 
@@ -108,19 +108,16 @@ def main() -> None:
108
108
  correct_filepath = path_to_scan
109
109
  if args.correct_fpath:
110
110
  correct_filepath = ''.join(args.correct_fpath)
111
+ hide_progress = args.hide_progress
111
112
 
112
113
  time_out = args.timeout
113
114
  core = args.cores
114
115
 
115
- timer = TimerThread()
116
- timer.setDaemon(True)
117
- timer.start()
118
-
119
116
  if os.path.isdir(path_to_scan):
120
117
  result = []
121
118
  result = run_scanners(path_to_scan, output_file_name, write_json_file, core, True,
122
119
  print_matched_text, formats, time_out, correct_mode, correct_filepath,
123
- selected_scanner, path_to_exclude)
120
+ selected_scanner, path_to_exclude, hide_progress=hide_progress)
124
121
 
125
122
  _result_log["Scan Result"] = result[1]
126
123
 
@@ -318,7 +315,7 @@ def get_kb_reference_to_print(merged_result: list) -> list:
318
315
  def merge_results(
319
316
  scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
320
317
  path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {},
321
- excluded_files: set = None
318
+ excluded_files: set = None, hide_progress: bool = False
322
319
  ) -> list:
323
320
 
324
321
  """
@@ -349,15 +346,18 @@ def merge_results(
349
346
  scancode_result.append(new_result_item)
350
347
  if manifest_licenses:
351
348
  for file_name, licenses in manifest_licenses.items():
349
+ valid_licenses = [lic.strip() for lic in licenses if isinstance(lic, str) and lic.strip()]
350
+ if not valid_licenses:
351
+ continue
352
352
  if file_name in scancode_result:
353
353
  merged_result_item = scancode_result[scancode_result.index(file_name)]
354
354
  # overwrite existing detected licenses with manifest-provided licenses
355
355
  merged_result_item.licenses = [] # clear existing licenses (setter clears when value falsy)
356
- merged_result_item.licenses = licenses
356
+ merged_result_item.licenses = valid_licenses
357
357
  merged_result_item.is_manifest_file = True
358
358
  else:
359
359
  new_result_item = SourceItem(file_name)
360
- new_result_item.licenses = licenses
360
+ new_result_item.licenses = valid_licenses
361
361
  new_result_item.is_manifest_file = True
362
362
  scancode_result.append(new_result_item)
363
363
 
@@ -367,19 +367,24 @@ def merge_results(
367
367
  # Add OSSItem for files in path_to_scan that are not in scancode_result
368
368
  # when KB returns an origin URL for their MD5 hash (skip excluded_files)
369
369
  if run_kb:
370
+ import tqdm
370
371
  abs_path_to_scan = os.path.abspath(path_to_scan)
371
372
  scancode_paths = {item.source_name_or_path for item in scancode_result}
373
+
374
+ files_to_scan = []
372
375
  for root, _dirs, files in os.walk(path_to_scan):
373
376
  for file in files:
374
- file_path = os.path.join(root, file)
375
- rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
376
- if rel_path in scancode_paths or rel_path in excluded_files:
377
- continue
378
- extra_item = SourceItem(rel_path)
379
- extra_item.set_oss_item(path_to_scan, run_kb)
380
- if extra_item.download_location:
381
- scancode_result.append(extra_item)
382
- scancode_paths.add(rel_path)
377
+ files_to_scan.append(os.path.join(root, file))
378
+
379
+ for file_path in tqdm.tqdm(files_to_scan, desc="KB Scanning", disable=hide_progress):
380
+ rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
381
+ if rel_path in scancode_paths or rel_path in excluded_files:
382
+ continue
383
+ extra_item = SourceItem(rel_path)
384
+ extra_item.set_oss_item(path_to_scan, run_kb)
385
+ if extra_item.download_location:
386
+ scancode_result.append(extra_item)
387
+ scancode_paths.add(rel_path)
383
388
 
384
389
  return scancode_result
385
390
 
@@ -391,7 +396,7 @@ def run_scanners(
391
396
  formats: list = [], time_out: int = 120,
392
397
  correct_mode: bool = True, correct_filepath: str = "",
393
398
  selected_scanner: str = ALL_MODE, path_to_exclude: list = [],
394
- all_exclude_mode: tuple = ()
399
+ all_exclude_mode: tuple = (), hide_progress: bool = False
395
400
  ) -> Tuple[bool, str, 'ScannerItem', list, list]:
396
401
  """
397
402
  Run Scancode and scanoss.py for the given path.
@@ -430,6 +435,8 @@ def run_scanners(
430
435
  logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
431
436
  True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
432
437
 
438
+ logger.info(f"Tool Info : {result_log['Tool Info']}")
439
+
433
440
  if '.xlsx' not in output_extensions and print_matched_text:
434
441
  logger.warning("-m option is only available for excel.")
435
442
  print_matched_text = False
@@ -456,12 +463,12 @@ def run_scanners(
456
463
  print_matched_text, formats, called_by_cli,
457
464
  time_out, correct_mode, correct_filepath,
458
465
  excluded_path_with_default_exclusion,
459
- excluded_files)
466
+ excluded_files, hide_progress)
460
467
  excluded_files = set(excluded_files) if excluded_files else set()
461
468
  if selected_scanner in ['scanoss', ALL_MODE]:
462
469
  scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_path, formats, True, num_cores,
463
470
  excluded_path_with_default_exclusion, excluded_files,
464
- write_json_file)
471
+ write_json_file, hide_progress)
465
472
 
466
473
  run_kb_msg = ""
467
474
  if selected_scanner in SCANNER_TYPE:
@@ -475,7 +482,7 @@ def run_scanners(
475
482
 
476
483
  spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
477
484
  merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
478
- path_to_scan, run_kb, manifest_licenses, excluded_files)
485
+ path_to_scan, run_kb, manifest_licenses, excluded_files, hide_progress)
479
486
  scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
480
487
  print_matched_text, output_path, output_files, output_extensions, correct_mode,
481
488
  correct_filepath, path_to_scan, excluded_path_without_dot, formats,
@@ -207,6 +207,49 @@ def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
207
207
  return []
208
208
 
209
209
 
210
+ def get_licenses_from_huggingface_metadata(file_path: str) -> list[str]:
211
+ try:
212
+ with open(file_path, 'r', encoding='utf-8') as f:
213
+ data = json.load(f)
214
+ except Exception as ex:
215
+ logger.info(f"Failed to read huggingface_hub_metadata.json {file_path}: {ex}")
216
+ return []
217
+
218
+ if not isinstance(data, dict):
219
+ return []
220
+
221
+ licenses: list[str] = []
222
+
223
+ def append_license(value):
224
+ if isinstance(value, str):
225
+ token = value.strip()
226
+ if token and token not in licenses:
227
+ licenses.append(token)
228
+ elif isinstance(value, list):
229
+ for item in value:
230
+ append_license(item)
231
+
232
+ # Hugging Face model API commonly returns top-level `license`
233
+ append_license(data.get('license'))
234
+
235
+ # Some metadata may include cardData/license variants
236
+ card_data = data.get('cardData')
237
+ if isinstance(card_data, dict):
238
+ append_license(card_data.get('license'))
239
+ append_license(card_data.get('licenses'))
240
+
241
+ # Many Hub API responses expose license only via tags, e.g. "license:apache-2.0".
242
+ tags = data.get('tags')
243
+ if isinstance(tags, list):
244
+ for tag in tags:
245
+ if isinstance(tag, str):
246
+ prefix = 'license:'
247
+ if tag.lower().startswith(prefix):
248
+ append_license(tag[len(prefix):].strip())
249
+
250
+ return licenses
251
+
252
+
210
253
  def get_manifest_licenses(file_path: str) -> list[str]:
211
254
  if file_path.endswith('.pom'):
212
255
  try:
@@ -247,3 +290,9 @@ def get_manifest_licenses(file_path: str) -> list[str]:
247
290
  except Exception as ex:
248
291
  logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
249
292
  return []
293
+ elif os.path.basename(file_path).lower() == 'huggingface_hub_metadata.json':
294
+ try:
295
+ return get_licenses_from_huggingface_metadata(file_path)
296
+ except Exception as ex:
297
+ logger.info(f"Failed to extract license from huggingface_hub_metadata.json {file_path}: {ex}")
298
+ return []
@@ -63,7 +63,7 @@ def run_scan(
63
63
  formats: list = [], called_by_cli: bool = False,
64
64
  time_out: int = 120, correct_mode: bool = True,
65
65
  correct_filepath: str = "", path_to_exclude: list = [],
66
- excluded_files: list = []
66
+ excluded_files: list = [], hide_progress: bool = False
67
67
  ) -> Tuple[bool, str, list, list]:
68
68
  if not called_by_cli:
69
69
  global logger
@@ -103,6 +103,9 @@ def run_scan(
103
103
  if not called_by_cli:
104
104
  logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"),
105
105
  True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
106
+
107
+ logger.info(f"Tool Info : {_result_log['Tool Info']}")
108
+
106
109
  num_cores = multiprocessing.cpu_count() - 1 if num_cores < 0 else num_cores
107
110
 
108
111
  if os.path.isdir(path_to_scan):
@@ -113,8 +116,8 @@ def run_scan(
113
116
  pretty_params["path_to_exclude"] = path_to_exclude
114
117
  pretty_params["output_file"] = output_file_name
115
118
  total_files_to_excluded = []
119
+ abs_path_to_scan = os.path.abspath(path_to_scan)
116
120
  if path_to_exclude:
117
- abs_path_to_scan = os.path.abspath(path_to_scan)
118
121
  for path in path_to_exclude:
119
122
  if os.path.isabs(path):
120
123
  exclude_path = os.path.relpath(path, abs_path_to_scan)
@@ -156,6 +159,19 @@ def run_scan(
156
159
  else:
157
160
  total_files_to_excluded.append(exclude_path_normalized)
158
161
 
162
+ for root, _, files in os.walk(path_to_scan):
163
+ for name in files:
164
+ full_path = os.path.join(root, name)
165
+ try:
166
+ if not check_binary(full_path, True):
167
+ continue
168
+ except Exception:
169
+ continue
170
+ rel_path = os.path.relpath(full_path, abs_path_to_scan)
171
+ rel_norm = os.path.normpath(rel_path).replace("\\", "/")
172
+ excluded_files.append(rel_norm)
173
+ logger.debug(f"Excluded binary from scancode: {rel_norm}")
174
+
159
175
  if excluded_files:
160
176
  total_files_to_excluded.extend(f"**/{file_path}" for file_path in excluded_files)
161
177
 
@@ -176,7 +192,8 @@ def run_scan(
176
192
  "url": True,
177
193
  "timeout": time_out,
178
194
  "include": (),
179
- "ignore": ignore_tuple
195
+ "ignore": ignore_tuple,
196
+ "quiet": hide_progress
180
197
  }
181
198
 
182
199
  _apply_scancode_unset_workaround(kwargs)
@@ -206,7 +223,7 @@ def run_scan(
206
223
  for scan_item in result_list:
207
224
  if os.path.isdir(scan_item.source_name_or_path):
208
225
  continue
209
- if check_binary(os.path.join(path_to_scan, scan_item.source_name_or_path)):
226
+ if check_binary(os.path.join(path_to_scan, scan_item.source_name_or_path), True):
210
227
  scan_item.exclude = True
211
228
  except Exception as ex:
212
229
  success = False
@@ -31,7 +31,7 @@ def get_scanoss_extra_info(scanned_result: dict) -> list:
31
31
  def run_scanoss_py(path_to_scan: str, output_path: str = "", format: list = [],
32
32
  called_by_cli: bool = False, num_threads: int = -1,
33
33
  path_to_exclude: list = [], excluded_files: set = None,
34
- write_json_file: bool = False) -> Tuple[list, bool]:
34
+ write_json_file: bool = False, hide_progress: bool = False) -> Tuple[list, bool]:
35
35
  """
36
36
  Run scanoss.py for the given path.
37
37
 
@@ -58,6 +58,7 @@ def run_scanoss_py(path_to_scan: str, output_path: str = "", format: list = [],
58
58
  os.remove(output_json_file)
59
59
 
60
60
  try:
61
+ logger.debug(f"|---Running SCANOSS on {path_to_scan}")
61
62
  scanoss_settings = ScanossSettings()
62
63
  scanner = Scanner(
63
64
  ignore_cert_errors=True,
@@ -67,7 +68,6 @@ def run_scanoss_py(path_to_scan: str, output_path: str = "", format: list = [],
67
68
  nb_threads=num_threads if num_threads > 0 else 10,
68
69
  scanoss_settings=scanoss_settings
69
70
  )
70
-
71
71
  output_buffer = io.StringIO()
72
72
  with contextlib.redirect_stdout(output_buffer), contextlib.redirect_stderr(output_buffer):
73
73
  scanner.scan_folder_with_options(scan_dir=path_to_scan)
@@ -75,6 +75,7 @@ def run_scanoss_py(path_to_scan: str, output_path: str = "", format: list = [],
75
75
  api_limit_exceed = "due to service limits being exceeded" in captured_output
76
76
 
77
77
  if os.path.isfile(output_json_file):
78
+ logger.debug("|---SCANOSS Parsing")
78
79
  with open(output_json_file, "r") as st_json:
79
80
  st_python = json.load(st_json)
80
81
  for key_to_exclude in excluded_files:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.13
3
+ Version: 2.2.15
4
4
  Summary: FOSSLight Source Scanner
5
5
  Author: LG Electronics
6
6
  License-Expression: Apache-2.0
@@ -24,11 +24,12 @@ Requires-Dist: fosslight_util>=2.1.37
24
24
  Requires-Dist: PyYAML
25
25
  Requires-Dist: wheel>=0.38.1
26
26
  Requires-Dist: intbitset
27
- Requires-Dist: fosslight_binary>=5.0.0
27
+ Requires-Dist: fosslight_binary>=5.1.22
28
28
  Requires-Dist: scancode-toolkit>=32.0.2
29
29
  Requires-Dist: fingerprints==1.2.3
30
30
  Requires-Dist: normality==2.6.1
31
31
  Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
32
+ Requires-Dist: tqdm
32
33
  Dynamic: license-file
33
34
 
34
35
  <!--
@@ -6,10 +6,11 @@ fosslight_util>=2.1.37
6
6
  PyYAML
7
7
  wheel>=0.38.1
8
8
  intbitset
9
- fosslight_binary>=5.0.0
9
+ fosslight_binary>=5.1.22
10
10
  scancode-toolkit>=32.0.2
11
11
  fingerprints==1.2.3
12
12
  normality==2.6.1
13
+ tqdm
13
14
 
14
15
  [:python_version >= "3.13"]
15
16
  psycopg2-binary>=2.9.10