fosslight-source 2.2.14__tar.gz → 2.2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {fosslight_source-2.2.14/src/fosslight_source.egg-info → fosslight_source-2.2.16}/PKG-INFO +2 -1
  2. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/pyproject.toml +2 -1
  3. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_help.py +2 -0
  4. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_scan_item.py +33 -6
  5. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/cli.py +44 -14
  6. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_manifest_extractor.py +112 -0
  7. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_scancode.py +8 -3
  8. {fosslight_source-2.2.14 → fosslight_source-2.2.16/src/fosslight_source.egg-info}/PKG-INFO +2 -1
  9. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/requires.txt +3 -0
  10. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/LICENSE +0 -0
  11. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/MANIFEST.in +0 -0
  12. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/README.md +0 -0
  13. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/setup.cfg +0 -0
  14. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/__init__.py +0 -0
  15. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_license_matched.py +0 -0
  16. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_parsing_scancode_file_item.py +0 -0
  17. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
  18. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_scanoss.py +0 -0
  19. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_spdx_extractor.py +0 -0
  20. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/SOURCES.txt +0 -0
  21. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
  22. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/entry_points.txt +0 -0
  23. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/top_level.txt +0 -0
  24. {fosslight_source-2.2.14 → fosslight_source-2.2.16}/tests/test_tox.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.14
3
+ Version: 2.2.16
4
4
  Summary: FOSSLight Source Scanner
5
5
  Author: LG Electronics
6
6
  License-Expression: Apache-2.0
@@ -29,6 +29,7 @@ Requires-Dist: scancode-toolkit>=32.0.2
29
29
  Requires-Dist: fingerprints==1.2.3
30
30
  Requires-Dist: normality==2.6.1
31
31
  Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
32
+ Requires-Dist: tomli; python_version < "3.11"
32
33
  Requires-Dist: tqdm
33
34
  Dynamic: license-file
34
35
 
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "fosslight_source"
10
- version = "2.2.14"
10
+ version = "2.2.16"
11
11
  description = "FOSSLight Source Scanner"
12
12
  readme = "README.md"
13
13
  license = "Apache-2.0"
@@ -39,6 +39,7 @@ dependencies = [
39
39
  "normality==2.6.1",
40
40
  # Python 3.13+ needs psycopg2-binary 2.9.10+ (has wheels; 2.9.9 builds fail with _PyInterpreterState_Get)
41
41
  "psycopg2-binary>=2.9.10; python_version >= '3.13'",
42
+ "tomli; python_version < '3.11'",
42
43
  "tqdm",
43
44
  ]
44
45
 
@@ -42,6 +42,8 @@ _HELP_MESSAGE_SOURCE_SCANNER = f"""
42
42
  --no_correction Skip OSS information correction with sbom-info.yaml
43
43
  --correct_fpath <path> Path to custom sbom-info.yaml file
44
44
  --hide_progress Hide the progress bar during scanning
45
+ --kb_url <url> KB API URL (priority: parameter > KB_URL env > default)
46
+ --kb_token <token> KB bearer token (priority: parameter > KB_TOKEN env)
45
47
 
46
48
  💡 Examples
47
49
  ────────────────────────────────────────────────────────────────────
@@ -19,11 +19,30 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
19
19
  _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
20
20
  '[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
21
21
  'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
22
- _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'setup\.cfg$', r'.*\.podspec$', r'Cargo\.toml$']
22
+ _manifest_filename = [
23
+ r'.*\.pom$',
24
+ r'package\.json$',
25
+ r'setup\.py$',
26
+ r'setup\.cfg$',
27
+ r'pyproject\.toml$',
28
+ r'.*\.podspec$',
29
+ r'Cargo\.toml$',
30
+ r'huggingface_hub_metadata\.json$',
31
+ ]
23
32
  MAX_LICENSE_LENGTH = 200
24
33
  MAX_LICENSE_TOTAL_LENGTH = 600
25
34
  SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
26
- KB_URL = "http://fosslight-kb.lge.com/"
35
+ DEFAULT_KB_URL = "http://fosslight-kb.lge.com/"
36
+
37
+
38
+ def resolve_kb_config(kb_url: str = "", kb_token: str = "") -> tuple[str, str]:
39
+ url = (kb_url or os.environ.get("KB_URL", DEFAULT_KB_URL)).strip() or DEFAULT_KB_URL
40
+
41
+ token = (kb_token or "").strip()
42
+ if not token:
43
+ token = (os.environ.get("KB_TOKEN") or "").strip()
44
+
45
+ return f"{url.rstrip('/')}/", token
27
46
 
28
47
 
29
48
  class SourceItem(FileItem):
@@ -105,15 +124,21 @@ class SourceItem(FileItem):
105
124
  logger.debug(f"Failed to compute MD5 for {self.source_name_or_path}: {e}")
106
125
  return md5_hex, wfp
107
126
 
108
- def _get_origin_url_from_md5_hash(self, md5_hash: str, wfp: str = "") -> str:
127
+ def _get_origin_url_from_md5_hash(
128
+ self, md5_hash: str, wfp: str = "", kb_url: str = DEFAULT_KB_URL, kb_token: str = ""
129
+ ) -> str:
109
130
  """Return origin_url from KB API."""
110
131
  try:
111
132
  payload = {"file_hash": md5_hash}
112
133
  if wfp and wfp.strip():
113
134
  payload["wfp_base64"] = base64.b64encode(wfp.strip().encode("utf-8")).decode("ascii")
114
- request = urllib.request.Request(f"{KB_URL}query", data=json.dumps(payload).encode('utf-8'), method='POST')
135
+ request = urllib.request.Request(
136
+ f"{kb_url}query", data=json.dumps(payload).encode('utf-8'), method='POST'
137
+ )
115
138
  request.add_header('Accept', 'application/json')
116
139
  request.add_header('Content-Type', 'application/json')
140
+ if kb_token:
141
+ request.add_header('Authorization', f'Bearer {kb_token}')
117
142
 
118
143
  with urllib.request.urlopen(request, timeout=10) as response:
119
144
  data = json.loads(response.read().decode())
@@ -170,7 +195,9 @@ class SourceItem(FileItem):
170
195
  logger.debug(f"Failed to extract OSS info from URL {url}: {e}")
171
196
  return "", "", ""
172
197
 
173
- def set_oss_item(self, path_to_scan: str = "", run_kb: bool = False) -> None:
198
+ def set_oss_item(
199
+ self, path_to_scan: str = "", run_kb: bool = False, kb_url: str = DEFAULT_KB_URL, kb_token: str = ""
200
+ ) -> None:
174
201
  self.oss_items = []
175
202
  if self.download_location:
176
203
  for url in self.download_location:
@@ -183,7 +210,7 @@ class SourceItem(FileItem):
183
210
  if run_kb and not self.is_license_text:
184
211
  md5_hash, wfp = self._get_hash(path_to_scan)
185
212
  if md5_hash:
186
- origin_url = self._get_origin_url_from_md5_hash(md5_hash, wfp)
213
+ origin_url = self._get_origin_url_from_md5_hash(md5_hash, wfp, kb_url, kb_token)
187
214
  if origin_url:
188
215
  self.kb_origin_url = origin_url
189
216
  self.kb_evidence = "exact_match"
@@ -9,6 +9,7 @@ import platform
9
9
  import time
10
10
  import warnings
11
11
  import logging
12
+ import re
12
13
  import urllib.request
13
14
  import urllib.error
14
15
  from datetime import datetime
@@ -18,6 +19,7 @@ from ._help import print_version, print_help_msg_source_scanner
18
19
  from ._license_matched import get_license_list_to_print
19
20
  from fosslight_util.output_format import check_output_formats_v2, write_output_file
20
21
  from fosslight_util.correct import correct_with_yaml
22
+ from fosslight_util.parsing_yaml import SUPPORT_OSS_INFO_FILES
21
23
  from .run_scancode import run_scan
22
24
  from fosslight_util.exclude import get_excluded_paths
23
25
  from .run_scanoss import run_scanoss_py
@@ -26,7 +28,7 @@ import yaml
26
28
  import argparse
27
29
  from .run_spdx_extractor import get_spdx_downloads
28
30
  from .run_manifest_extractor import get_manifest_licenses
29
- from ._scan_item import SourceItem, KB_URL
31
+ from ._scan_item import SourceItem, resolve_kb_config
30
32
  from fosslight_util.oss_item import ScannerItem
31
33
  from typing import Tuple
32
34
  from ._scan_item import is_manifest_file
@@ -43,6 +45,7 @@ MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
43
45
  KB_REFERENCE_HEADER = ['ID', 'Source Path', 'KB Origin URL', 'Evidence']
44
46
  ALL_MODE = 'all'
45
47
  SCANNER_TYPE = ['kb', 'scancode', 'scanoss', ALL_MODE]
48
+ OSS_INFO_CORRECTION_COMMENT = "Excluded because it's OSS info correction file"
46
49
 
47
50
 
48
51
  logger = logging.getLogger(constant.LOGGER_NAME)
@@ -81,6 +84,8 @@ def main() -> None:
81
84
  parser.add_argument('--no_correction', action='store_true', required=False)
82
85
  parser.add_argument('--correct_fpath', nargs=1, type=str, required=False)
83
86
  parser.add_argument('--hide_progress', action='store_true', required=False)
87
+ parser.add_argument('--kb_url', type=str, required=False, default="")
88
+ parser.add_argument('--kb_token', type=str, required=False, default="")
84
89
 
85
90
  args = parser.parse_args()
86
91
 
@@ -109,6 +114,8 @@ def main() -> None:
109
114
  if args.correct_fpath:
110
115
  correct_filepath = ''.join(args.correct_fpath)
111
116
  hide_progress = args.hide_progress
117
+ kb_url = args.kb_url
118
+ kb_token = args.kb_token
112
119
 
113
120
  time_out = args.timeout
114
121
  core = args.cores
@@ -117,7 +124,8 @@ def main() -> None:
117
124
  result = []
118
125
  result = run_scanners(path_to_scan, output_file_name, write_json_file, core, True,
119
126
  print_matched_text, formats, time_out, correct_mode, correct_filepath,
120
- selected_scanner, path_to_exclude, hide_progress=hide_progress)
127
+ selected_scanner, path_to_exclude, hide_progress=hide_progress,
128
+ kb_url=kb_url, kb_token=kb_token)
121
129
 
122
130
  _result_log["Scan Result"] = result[1]
123
131
 
@@ -265,10 +273,12 @@ def create_report_file(
265
273
  return scan_item
266
274
 
267
275
 
268
- def check_kb_server_reachable() -> bool:
276
+ def check_kb_server_reachable(kb_url: str, kb_token: str = "") -> bool:
269
277
  for attempt in range(3):
270
278
  try:
271
- request = urllib.request.Request(f"{KB_URL}health", method='GET')
279
+ request = urllib.request.Request(f"{kb_url}health", method='GET')
280
+ if kb_token:
281
+ request.add_header('Authorization', f'Bearer {kb_token}')
272
282
  with urllib.request.urlopen(request, timeout=10) as response:
273
283
  logger.debug(f"KB server is reachable. Response status: {response.status}")
274
284
  return True
@@ -312,10 +322,18 @@ def get_kb_reference_to_print(merged_result: list) -> list:
312
322
  return data
313
323
 
314
324
 
325
+ def mark_oss_info_correction_files_as_excluded(scan_results: list) -> None:
326
+ for item in scan_results:
327
+ file_name = os.path.basename(item.source_name_or_path).lower()
328
+ if any(re.search(pattern, file_name, re.IGNORECASE) for pattern in SUPPORT_OSS_INFO_FILES):
329
+ item.exclude = True
330
+ item.comment = OSS_INFO_CORRECTION_COMMENT
331
+
332
+
315
333
  def merge_results(
316
334
  scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
317
335
  path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {},
318
- excluded_files: set = None, hide_progress: bool = False
336
+ excluded_files: set = None, hide_progress: bool = False, kb_url: str = "", kb_token: str = ""
319
337
  ) -> list:
320
338
 
321
339
  """
@@ -326,6 +344,8 @@ def merge_results(
326
344
  :param path_to_scan: path to the scanned directory for constructing absolute file paths.
327
345
  :param run_kb: if True, load kb result.
328
346
  :param excluded_files: set of relative paths to exclude from KB-only file discovery.
347
+ :param kb_url: KB API base URL.
348
+ :param kb_token: KB API bearer token.
329
349
  :return merged_result: list of merged result in SourceItem.
330
350
  """
331
351
  if excluded_files is None:
@@ -346,20 +366,23 @@ def merge_results(
346
366
  scancode_result.append(new_result_item)
347
367
  if manifest_licenses:
348
368
  for file_name, licenses in manifest_licenses.items():
369
+ valid_licenses = [lic.strip() for lic in licenses if isinstance(lic, str) and lic.strip()]
370
+ if not valid_licenses:
371
+ continue
349
372
  if file_name in scancode_result:
350
373
  merged_result_item = scancode_result[scancode_result.index(file_name)]
351
374
  # overwrite existing detected licenses with manifest-provided licenses
352
375
  merged_result_item.licenses = [] # clear existing licenses (setter clears when value falsy)
353
- merged_result_item.licenses = licenses
376
+ merged_result_item.licenses = valid_licenses
354
377
  merged_result_item.is_manifest_file = True
355
378
  else:
356
379
  new_result_item = SourceItem(file_name)
357
- new_result_item.licenses = licenses
380
+ new_result_item.licenses = valid_licenses
358
381
  new_result_item.is_manifest_file = True
359
382
  scancode_result.append(new_result_item)
360
383
 
361
384
  for item in scancode_result:
362
- item.set_oss_item(path_to_scan, run_kb)
385
+ item.set_oss_item(path_to_scan, run_kb, kb_url, kb_token)
363
386
 
364
387
  # Add OSSItem for files in path_to_scan that are not in scancode_result
365
388
  # when KB returns an origin URL for their MD5 hash (skip excluded_files)
@@ -378,7 +401,7 @@ def merge_results(
378
401
  if rel_path in scancode_paths or rel_path in excluded_files:
379
402
  continue
380
403
  extra_item = SourceItem(rel_path)
381
- extra_item.set_oss_item(path_to_scan, run_kb)
404
+ extra_item.set_oss_item(path_to_scan, run_kb, kb_url, kb_token)
382
405
  if extra_item.download_location:
383
406
  scancode_result.append(extra_item)
384
407
  scancode_paths.add(rel_path)
@@ -393,7 +416,7 @@ def run_scanners(
393
416
  formats: list = [], time_out: int = 120,
394
417
  correct_mode: bool = True, correct_filepath: str = "",
395
418
  selected_scanner: str = ALL_MODE, path_to_exclude: list = [],
396
- all_exclude_mode: tuple = (), hide_progress: bool = False
419
+ all_exclude_mode: tuple = (), hide_progress: bool = False, kb_url: str = "", kb_token: str = ""
397
420
  ) -> Tuple[bool, str, 'ScannerItem', list, list]:
398
421
  """
399
422
  Run Scancode and scanoss.py for the given path.
@@ -405,6 +428,8 @@ def run_scanners(
405
428
  :param called_by_cli: if not called by cli, initialize logger.
406
429
  :param print_matched_text: if requested, output matched text (only for scancode).
407
430
  :param format: output format (excel, csv, opossum).
431
+ :param kb_url: KB API base URL. If empty, read KB_URL environment variable, then use default.
432
+ :param kb_token: KB API bearer token. If empty, read KB_TOKEN environment variable.
408
433
  :return success: success or failure of scancode.
409
434
  :return result_log["Scan Result"]:
410
435
  :return merged_result: merged scan result of scancode and scanoss.
@@ -421,6 +446,7 @@ def run_scanners(
421
446
  result_log = {}
422
447
  scan_item = []
423
448
  api_limit_exceed = False
449
+ kb_url, kb_token = resolve_kb_config(kb_url, kb_token)
424
450
 
425
451
  success, msg, output_path, output_files, output_extensions, formats = check_output_formats_v2(output_file_name, formats)
426
452
 
@@ -432,6 +458,8 @@ def run_scanners(
432
458
  logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
433
459
  True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
434
460
 
461
+ logger.info(f"Tool Info : {result_log['Tool Info']}")
462
+
435
463
  if '.xlsx' not in output_extensions and print_matched_text:
436
464
  logger.warning("-m option is only available for excel.")
437
465
  print_matched_text = False
@@ -469,15 +497,17 @@ def run_scanners(
469
497
  if selected_scanner in SCANNER_TYPE:
470
498
  run_kb = True if selected_scanner in ['kb', ALL_MODE] else False
471
499
  if run_kb:
472
- if not check_kb_server_reachable():
500
+ if not check_kb_server_reachable(kb_url, kb_token):
473
501
  run_kb = False
474
- run_kb_msg = "KB Unreachable"
502
+ run_kb_msg = f"KB({kb_url}) Unreachable"
475
503
  else:
476
- run_kb_msg = "KB Enabled"
504
+ run_kb_msg = f"KB({kb_url}) Enabled"
477
505
 
478
506
  spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
479
507
  merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
480
- path_to_scan, run_kb, manifest_licenses, excluded_files, hide_progress)
508
+ path_to_scan, run_kb, manifest_licenses, excluded_files,
509
+ hide_progress, kb_url, kb_token)
510
+ mark_oss_info_correction_files_as_excluded(merged_result)
481
511
  scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
482
512
  print_matched_text, output_path, output_files, output_extensions, correct_mode,
483
513
  correct_filepath, path_to_scan, excluded_path_without_dot, formats,
@@ -125,6 +125,63 @@ def get_licenses_from_setup_py(file_path: str) -> list[str]:
125
125
  return _split_spdx_expression(value)
126
126
 
127
127
 
128
+ def get_licenses_from_pyproject_toml(file_path: str) -> list[str]:
129
+ try:
130
+ data = None
131
+ try:
132
+ import tomllib as toml_loader # Python 3.11+
133
+ with open(file_path, 'rb') as f:
134
+ data = toml_loader.load(f)
135
+ except Exception:
136
+ try:
137
+ import tomli as toml_loader # Backport
138
+ with open(file_path, 'rb') as f:
139
+ data = toml_loader.load(f)
140
+ except Exception:
141
+ data = None
142
+
143
+ if isinstance(data, dict):
144
+ project_tbl = data.get('project') or {}
145
+ license_value = project_tbl.get('license')
146
+ if isinstance(license_value, str) and license_value.strip():
147
+ return [license_value.strip()]
148
+ if isinstance(license_value, dict):
149
+ text_value = license_value.get('text')
150
+ if isinstance(text_value, str) and text_value.strip():
151
+ return [text_value.strip()]
152
+ if license_value.get('file'):
153
+ return []
154
+ except Exception as ex:
155
+ logger.info(f"Failed to parse pyproject.toml via toml parser for {file_path}: {ex}")
156
+
157
+ try:
158
+ with open(file_path, 'r', encoding='utf-8') as f:
159
+ content = f.read()
160
+ project_match = re.search(r'^\s*\[project\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
161
+ if not project_match:
162
+ return []
163
+ block = project_match.group(1)
164
+ m = re.search(r'^\s*license\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)', block,
165
+ flags=re.MULTILINE | re.DOTALL)
166
+ if m:
167
+ val = m.group('val').strip()
168
+ if val:
169
+ return [val]
170
+ m2 = re.search(r'^\s*license\s*=\s*\{[^}]*?\btext\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)',
171
+ block, flags=re.MULTILINE | re.DOTALL)
172
+ if m2:
173
+ val = m2.group('val').strip()
174
+ if val:
175
+ return [val]
176
+ m3 = re.search(r'^\s*license\s*=\s*\{[^}]*?\bfile\s*=', block, flags=re.MULTILINE | re.DOTALL)
177
+ if m3:
178
+ return []
179
+ except Exception as ex:
180
+ logger.info(f"Failed to parse pyproject.toml {file_path}: {ex}")
181
+ return []
182
+ return []
183
+
184
+
128
185
  def get_licenses_from_podspec(file_path: str) -> list[str]:
129
186
  try:
130
187
  with open(file_path, 'r', encoding='utf-8') as f:
@@ -207,6 +264,49 @@ def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
207
264
  return []
208
265
 
209
266
 
267
+ def get_licenses_from_huggingface_metadata(file_path: str) -> list[str]:
268
+ try:
269
+ with open(file_path, 'r', encoding='utf-8') as f:
270
+ data = json.load(f)
271
+ except Exception as ex:
272
+ logger.info(f"Failed to read huggingface_hub_metadata.json {file_path}: {ex}")
273
+ return []
274
+
275
+ if not isinstance(data, dict):
276
+ return []
277
+
278
+ licenses: list[str] = []
279
+
280
+ def append_license(value):
281
+ if isinstance(value, str):
282
+ token = value.strip()
283
+ if token and token not in licenses:
284
+ licenses.append(token)
285
+ elif isinstance(value, list):
286
+ for item in value:
287
+ append_license(item)
288
+
289
+ # Hugging Face model API commonly returns top-level `license`
290
+ append_license(data.get('license'))
291
+
292
+ # Some metadata may include cardData/license variants
293
+ card_data = data.get('cardData')
294
+ if isinstance(card_data, dict):
295
+ append_license(card_data.get('license'))
296
+ append_license(card_data.get('licenses'))
297
+
298
+ # Many Hub API responses expose license only via tags, e.g. "license:apache-2.0".
299
+ tags = data.get('tags')
300
+ if isinstance(tags, list):
301
+ for tag in tags:
302
+ if isinstance(tag, str):
303
+ prefix = 'license:'
304
+ if tag.lower().startswith(prefix):
305
+ append_license(tag[len(prefix):].strip())
306
+
307
+ return licenses
308
+
309
+
210
310
  def get_manifest_licenses(file_path: str) -> list[str]:
211
311
  if file_path.endswith('.pom'):
212
312
  try:
@@ -235,6 +335,12 @@ def get_manifest_licenses(file_path: str) -> list[str]:
235
335
  except Exception as ex:
236
336
  logger.info(f"Failed to extract license from setup.py {file_path}: {ex}")
237
337
  return []
338
+ elif os.path.basename(file_path).lower() == 'pyproject.toml':
339
+ try:
340
+ return get_licenses_from_pyproject_toml(file_path)
341
+ except Exception as ex:
342
+ logger.info(f"Failed to extract license from pyproject.toml {file_path}: {ex}")
343
+ return []
238
344
  elif os.path.basename(file_path).lower().endswith('.podspec'):
239
345
  try:
240
346
  return get_licenses_from_podspec(file_path)
@@ -247,3 +353,9 @@ def get_manifest_licenses(file_path: str) -> list[str]:
247
353
  except Exception as ex:
248
354
  logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
249
355
  return []
356
+ elif os.path.basename(file_path).lower() == 'huggingface_hub_metadata.json':
357
+ try:
358
+ return get_licenses_from_huggingface_metadata(file_path)
359
+ except Exception as ex:
360
+ logger.info(f"Failed to extract license from huggingface_hub_metadata.json {file_path}: {ex}")
361
+ return []
@@ -103,6 +103,9 @@ def run_scan(
103
103
  if not called_by_cli:
104
104
  logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"),
105
105
  True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
106
+
107
+ logger.info(f"Tool Info : {_result_log['Tool Info']}")
108
+
106
109
  num_cores = multiprocessing.cpu_count() - 1 if num_cores < 0 else num_cores
107
110
 
108
111
  if os.path.isdir(path_to_scan):
@@ -113,6 +116,7 @@ def run_scan(
113
116
  pretty_params["path_to_exclude"] = path_to_exclude
114
117
  pretty_params["output_file"] = output_file_name
115
118
  total_files_to_excluded = []
119
+ binary_files_to_exclude = []
116
120
  abs_path_to_scan = os.path.abspath(path_to_scan)
117
121
  if path_to_exclude:
118
122
  for path in path_to_exclude:
@@ -166,11 +170,12 @@ def run_scan(
166
170
  continue
167
171
  rel_path = os.path.relpath(full_path, abs_path_to_scan)
168
172
  rel_norm = os.path.normpath(rel_path).replace("\\", "/")
169
- excluded_files.append(rel_norm)
173
+ binary_files_to_exclude.append(rel_norm)
170
174
  logger.debug(f"Excluded binary from scancode: {rel_norm}")
171
175
 
172
- if excluded_files:
173
- total_files_to_excluded.extend(f"**/{file_path}" for file_path in excluded_files)
176
+ all_excluded_for_scancode = list(excluded_files) + binary_files_to_exclude
177
+ if all_excluded_for_scancode:
178
+ total_files_to_excluded.extend(f"**/{file_path}" for file_path in all_excluded_for_scancode)
174
179
 
175
180
  total_files_to_excluded = sorted(list(set(total_files_to_excluded)))
176
181
  ignore_tuple = tuple(total_files_to_excluded)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.14
3
+ Version: 2.2.16
4
4
  Summary: FOSSLight Source Scanner
5
5
  Author: LG Electronics
6
6
  License-Expression: Apache-2.0
@@ -29,6 +29,7 @@ Requires-Dist: scancode-toolkit>=32.0.2
29
29
  Requires-Dist: fingerprints==1.2.3
30
30
  Requires-Dist: normality==2.6.1
31
31
  Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
32
+ Requires-Dist: tomli; python_version < "3.11"
32
33
  Requires-Dist: tqdm
33
34
  Dynamic: license-file
34
35
 
@@ -12,5 +12,8 @@ fingerprints==1.2.3
12
12
  normality==2.6.1
13
13
  tqdm
14
14
 
15
+ [:python_version < "3.11"]
16
+ tomli
17
+
15
18
  [:python_version >= "3.13"]
16
19
  psycopg2-binary>=2.9.10