fosslight-source 2.2.14__tar.gz → 2.2.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fosslight_source-2.2.14/src/fosslight_source.egg-info → fosslight_source-2.2.16}/PKG-INFO +2 -1
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/pyproject.toml +2 -1
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_help.py +2 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_scan_item.py +33 -6
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/cli.py +44 -14
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_manifest_extractor.py +112 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_scancode.py +8 -3
- {fosslight_source-2.2.14 → fosslight_source-2.2.16/src/fosslight_source.egg-info}/PKG-INFO +2 -1
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/requires.txt +3 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/LICENSE +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/MANIFEST.in +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/README.md +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/setup.cfg +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/__init__.py +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_license_matched.py +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_parsing_scancode_file_item.py +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_scanoss.py +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_spdx_extractor.py +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/SOURCES.txt +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/entry_points.txt +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/top_level.txt +0 -0
- {fosslight_source-2.2.14 → fosslight_source-2.2.16}/tests/test_tox.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fosslight_source
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.16
|
|
4
4
|
Summary: FOSSLight Source Scanner
|
|
5
5
|
Author: LG Electronics
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -29,6 +29,7 @@ Requires-Dist: scancode-toolkit>=32.0.2
|
|
|
29
29
|
Requires-Dist: fingerprints==1.2.3
|
|
30
30
|
Requires-Dist: normality==2.6.1
|
|
31
31
|
Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
|
|
32
|
+
Requires-Dist: tomli; python_version < "3.11"
|
|
32
33
|
Requires-Dist: tqdm
|
|
33
34
|
Dynamic: license-file
|
|
34
35
|
|
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "fosslight_source"
|
|
10
|
-
version = "2.2.
|
|
10
|
+
version = "2.2.16"
|
|
11
11
|
description = "FOSSLight Source Scanner"
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
license = "Apache-2.0"
|
|
@@ -39,6 +39,7 @@ dependencies = [
|
|
|
39
39
|
"normality==2.6.1",
|
|
40
40
|
# Python 3.13+ needs psycopg2-binary 2.9.10+ (has wheels; 2.9.9 builds fail with _PyInterpreterState_Get)
|
|
41
41
|
"psycopg2-binary>=2.9.10; python_version >= '3.13'",
|
|
42
|
+
"tomli; python_version < '3.11'",
|
|
42
43
|
"tqdm",
|
|
43
44
|
]
|
|
44
45
|
|
|
@@ -42,6 +42,8 @@ _HELP_MESSAGE_SOURCE_SCANNER = f"""
|
|
|
42
42
|
--no_correction Skip OSS information correction with sbom-info.yaml
|
|
43
43
|
--correct_fpath <path> Path to custom sbom-info.yaml file
|
|
44
44
|
--hide_progress Hide the progress bar during scanning
|
|
45
|
+
--kb_url <url> KB API URL (priority: parameter > KB_URL env > default)
|
|
46
|
+
--kb_token <token> KB bearer token (priority: parameter > KB_TOKEN env)
|
|
45
47
|
|
|
46
48
|
💡 Examples
|
|
47
49
|
────────────────────────────────────────────────────────────────────
|
|
@@ -19,11 +19,30 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
|
|
|
19
19
|
_notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
|
|
20
20
|
'[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
|
|
21
21
|
'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
|
|
22
|
-
_manifest_filename = [
|
|
22
|
+
_manifest_filename = [
|
|
23
|
+
r'.*\.pom$',
|
|
24
|
+
r'package\.json$',
|
|
25
|
+
r'setup\.py$',
|
|
26
|
+
r'setup\.cfg$',
|
|
27
|
+
r'pyproject\.toml$',
|
|
28
|
+
r'.*\.podspec$',
|
|
29
|
+
r'Cargo\.toml$',
|
|
30
|
+
r'huggingface_hub_metadata\.json$',
|
|
31
|
+
]
|
|
23
32
|
MAX_LICENSE_LENGTH = 200
|
|
24
33
|
MAX_LICENSE_TOTAL_LENGTH = 600
|
|
25
34
|
SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
|
|
26
|
-
|
|
35
|
+
DEFAULT_KB_URL = "http://fosslight-kb.lge.com/"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def resolve_kb_config(kb_url: str = "", kb_token: str = "") -> tuple[str, str]:
|
|
39
|
+
url = (kb_url or os.environ.get("KB_URL", DEFAULT_KB_URL)).strip() or DEFAULT_KB_URL
|
|
40
|
+
|
|
41
|
+
token = (kb_token or "").strip()
|
|
42
|
+
if not token:
|
|
43
|
+
token = (os.environ.get("KB_TOKEN") or "").strip()
|
|
44
|
+
|
|
45
|
+
return f"{url.rstrip('/')}/", token
|
|
27
46
|
|
|
28
47
|
|
|
29
48
|
class SourceItem(FileItem):
|
|
@@ -105,15 +124,21 @@ class SourceItem(FileItem):
|
|
|
105
124
|
logger.debug(f"Failed to compute MD5 for {self.source_name_or_path}: {e}")
|
|
106
125
|
return md5_hex, wfp
|
|
107
126
|
|
|
108
|
-
def _get_origin_url_from_md5_hash(
|
|
127
|
+
def _get_origin_url_from_md5_hash(
|
|
128
|
+
self, md5_hash: str, wfp: str = "", kb_url: str = DEFAULT_KB_URL, kb_token: str = ""
|
|
129
|
+
) -> str:
|
|
109
130
|
"""Return origin_url from KB API."""
|
|
110
131
|
try:
|
|
111
132
|
payload = {"file_hash": md5_hash}
|
|
112
133
|
if wfp and wfp.strip():
|
|
113
134
|
payload["wfp_base64"] = base64.b64encode(wfp.strip().encode("utf-8")).decode("ascii")
|
|
114
|
-
request = urllib.request.Request(
|
|
135
|
+
request = urllib.request.Request(
|
|
136
|
+
f"{kb_url}query", data=json.dumps(payload).encode('utf-8'), method='POST'
|
|
137
|
+
)
|
|
115
138
|
request.add_header('Accept', 'application/json')
|
|
116
139
|
request.add_header('Content-Type', 'application/json')
|
|
140
|
+
if kb_token:
|
|
141
|
+
request.add_header('Authorization', f'Bearer {kb_token}')
|
|
117
142
|
|
|
118
143
|
with urllib.request.urlopen(request, timeout=10) as response:
|
|
119
144
|
data = json.loads(response.read().decode())
|
|
@@ -170,7 +195,9 @@ class SourceItem(FileItem):
|
|
|
170
195
|
logger.debug(f"Failed to extract OSS info from URL {url}: {e}")
|
|
171
196
|
return "", "", ""
|
|
172
197
|
|
|
173
|
-
def set_oss_item(
|
|
198
|
+
def set_oss_item(
|
|
199
|
+
self, path_to_scan: str = "", run_kb: bool = False, kb_url: str = DEFAULT_KB_URL, kb_token: str = ""
|
|
200
|
+
) -> None:
|
|
174
201
|
self.oss_items = []
|
|
175
202
|
if self.download_location:
|
|
176
203
|
for url in self.download_location:
|
|
@@ -183,7 +210,7 @@ class SourceItem(FileItem):
|
|
|
183
210
|
if run_kb and not self.is_license_text:
|
|
184
211
|
md5_hash, wfp = self._get_hash(path_to_scan)
|
|
185
212
|
if md5_hash:
|
|
186
|
-
origin_url = self._get_origin_url_from_md5_hash(md5_hash, wfp)
|
|
213
|
+
origin_url = self._get_origin_url_from_md5_hash(md5_hash, wfp, kb_url, kb_token)
|
|
187
214
|
if origin_url:
|
|
188
215
|
self.kb_origin_url = origin_url
|
|
189
216
|
self.kb_evidence = "exact_match"
|
|
@@ -9,6 +9,7 @@ import platform
|
|
|
9
9
|
import time
|
|
10
10
|
import warnings
|
|
11
11
|
import logging
|
|
12
|
+
import re
|
|
12
13
|
import urllib.request
|
|
13
14
|
import urllib.error
|
|
14
15
|
from datetime import datetime
|
|
@@ -18,6 +19,7 @@ from ._help import print_version, print_help_msg_source_scanner
|
|
|
18
19
|
from ._license_matched import get_license_list_to_print
|
|
19
20
|
from fosslight_util.output_format import check_output_formats_v2, write_output_file
|
|
20
21
|
from fosslight_util.correct import correct_with_yaml
|
|
22
|
+
from fosslight_util.parsing_yaml import SUPPORT_OSS_INFO_FILES
|
|
21
23
|
from .run_scancode import run_scan
|
|
22
24
|
from fosslight_util.exclude import get_excluded_paths
|
|
23
25
|
from .run_scanoss import run_scanoss_py
|
|
@@ -26,7 +28,7 @@ import yaml
|
|
|
26
28
|
import argparse
|
|
27
29
|
from .run_spdx_extractor import get_spdx_downloads
|
|
28
30
|
from .run_manifest_extractor import get_manifest_licenses
|
|
29
|
-
from ._scan_item import SourceItem,
|
|
31
|
+
from ._scan_item import SourceItem, resolve_kb_config
|
|
30
32
|
from fosslight_util.oss_item import ScannerItem
|
|
31
33
|
from typing import Tuple
|
|
32
34
|
from ._scan_item import is_manifest_file
|
|
@@ -43,6 +45,7 @@ MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
|
43
45
|
KB_REFERENCE_HEADER = ['ID', 'Source Path', 'KB Origin URL', 'Evidence']
|
|
44
46
|
ALL_MODE = 'all'
|
|
45
47
|
SCANNER_TYPE = ['kb', 'scancode', 'scanoss', ALL_MODE]
|
|
48
|
+
OSS_INFO_CORRECTION_COMMENT = "Excluded because it's OSS info correction file"
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
@@ -81,6 +84,8 @@ def main() -> None:
|
|
|
81
84
|
parser.add_argument('--no_correction', action='store_true', required=False)
|
|
82
85
|
parser.add_argument('--correct_fpath', nargs=1, type=str, required=False)
|
|
83
86
|
parser.add_argument('--hide_progress', action='store_true', required=False)
|
|
87
|
+
parser.add_argument('--kb_url', type=str, required=False, default="")
|
|
88
|
+
parser.add_argument('--kb_token', type=str, required=False, default="")
|
|
84
89
|
|
|
85
90
|
args = parser.parse_args()
|
|
86
91
|
|
|
@@ -109,6 +114,8 @@ def main() -> None:
|
|
|
109
114
|
if args.correct_fpath:
|
|
110
115
|
correct_filepath = ''.join(args.correct_fpath)
|
|
111
116
|
hide_progress = args.hide_progress
|
|
117
|
+
kb_url = args.kb_url
|
|
118
|
+
kb_token = args.kb_token
|
|
112
119
|
|
|
113
120
|
time_out = args.timeout
|
|
114
121
|
core = args.cores
|
|
@@ -117,7 +124,8 @@ def main() -> None:
|
|
|
117
124
|
result = []
|
|
118
125
|
result = run_scanners(path_to_scan, output_file_name, write_json_file, core, True,
|
|
119
126
|
print_matched_text, formats, time_out, correct_mode, correct_filepath,
|
|
120
|
-
selected_scanner, path_to_exclude, hide_progress=hide_progress
|
|
127
|
+
selected_scanner, path_to_exclude, hide_progress=hide_progress,
|
|
128
|
+
kb_url=kb_url, kb_token=kb_token)
|
|
121
129
|
|
|
122
130
|
_result_log["Scan Result"] = result[1]
|
|
123
131
|
|
|
@@ -265,10 +273,12 @@ def create_report_file(
|
|
|
265
273
|
return scan_item
|
|
266
274
|
|
|
267
275
|
|
|
268
|
-
def check_kb_server_reachable() -> bool:
|
|
276
|
+
def check_kb_server_reachable(kb_url: str, kb_token: str = "") -> bool:
|
|
269
277
|
for attempt in range(3):
|
|
270
278
|
try:
|
|
271
|
-
request = urllib.request.Request(f"{
|
|
279
|
+
request = urllib.request.Request(f"{kb_url}health", method='GET')
|
|
280
|
+
if kb_token:
|
|
281
|
+
request.add_header('Authorization', f'Bearer {kb_token}')
|
|
272
282
|
with urllib.request.urlopen(request, timeout=10) as response:
|
|
273
283
|
logger.debug(f"KB server is reachable. Response status: {response.status}")
|
|
274
284
|
return True
|
|
@@ -312,10 +322,18 @@ def get_kb_reference_to_print(merged_result: list) -> list:
|
|
|
312
322
|
return data
|
|
313
323
|
|
|
314
324
|
|
|
325
|
+
def mark_oss_info_correction_files_as_excluded(scan_results: list) -> None:
|
|
326
|
+
for item in scan_results:
|
|
327
|
+
file_name = os.path.basename(item.source_name_or_path).lower()
|
|
328
|
+
if any(re.search(pattern, file_name, re.IGNORECASE) for pattern in SUPPORT_OSS_INFO_FILES):
|
|
329
|
+
item.exclude = True
|
|
330
|
+
item.comment = OSS_INFO_CORRECTION_COMMENT
|
|
331
|
+
|
|
332
|
+
|
|
315
333
|
def merge_results(
|
|
316
334
|
scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
|
|
317
335
|
path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {},
|
|
318
|
-
excluded_files: set = None, hide_progress: bool = False
|
|
336
|
+
excluded_files: set = None, hide_progress: bool = False, kb_url: str = "", kb_token: str = ""
|
|
319
337
|
) -> list:
|
|
320
338
|
|
|
321
339
|
"""
|
|
@@ -326,6 +344,8 @@ def merge_results(
|
|
|
326
344
|
:param path_to_scan: path to the scanned directory for constructing absolute file paths.
|
|
327
345
|
:param run_kb: if True, load kb result.
|
|
328
346
|
:param excluded_files: set of relative paths to exclude from KB-only file discovery.
|
|
347
|
+
:param kb_url: KB API base URL.
|
|
348
|
+
:param kb_token: KB API bearer token.
|
|
329
349
|
:return merged_result: list of merged result in SourceItem.
|
|
330
350
|
"""
|
|
331
351
|
if excluded_files is None:
|
|
@@ -346,20 +366,23 @@ def merge_results(
|
|
|
346
366
|
scancode_result.append(new_result_item)
|
|
347
367
|
if manifest_licenses:
|
|
348
368
|
for file_name, licenses in manifest_licenses.items():
|
|
369
|
+
valid_licenses = [lic.strip() for lic in licenses if isinstance(lic, str) and lic.strip()]
|
|
370
|
+
if not valid_licenses:
|
|
371
|
+
continue
|
|
349
372
|
if file_name in scancode_result:
|
|
350
373
|
merged_result_item = scancode_result[scancode_result.index(file_name)]
|
|
351
374
|
# overwrite existing detected licenses with manifest-provided licenses
|
|
352
375
|
merged_result_item.licenses = [] # clear existing licenses (setter clears when value falsy)
|
|
353
|
-
merged_result_item.licenses =
|
|
376
|
+
merged_result_item.licenses = valid_licenses
|
|
354
377
|
merged_result_item.is_manifest_file = True
|
|
355
378
|
else:
|
|
356
379
|
new_result_item = SourceItem(file_name)
|
|
357
|
-
new_result_item.licenses =
|
|
380
|
+
new_result_item.licenses = valid_licenses
|
|
358
381
|
new_result_item.is_manifest_file = True
|
|
359
382
|
scancode_result.append(new_result_item)
|
|
360
383
|
|
|
361
384
|
for item in scancode_result:
|
|
362
|
-
item.set_oss_item(path_to_scan, run_kb)
|
|
385
|
+
item.set_oss_item(path_to_scan, run_kb, kb_url, kb_token)
|
|
363
386
|
|
|
364
387
|
# Add OSSItem for files in path_to_scan that are not in scancode_result
|
|
365
388
|
# when KB returns an origin URL for their MD5 hash (skip excluded_files)
|
|
@@ -378,7 +401,7 @@ def merge_results(
|
|
|
378
401
|
if rel_path in scancode_paths or rel_path in excluded_files:
|
|
379
402
|
continue
|
|
380
403
|
extra_item = SourceItem(rel_path)
|
|
381
|
-
extra_item.set_oss_item(path_to_scan, run_kb)
|
|
404
|
+
extra_item.set_oss_item(path_to_scan, run_kb, kb_url, kb_token)
|
|
382
405
|
if extra_item.download_location:
|
|
383
406
|
scancode_result.append(extra_item)
|
|
384
407
|
scancode_paths.add(rel_path)
|
|
@@ -393,7 +416,7 @@ def run_scanners(
|
|
|
393
416
|
formats: list = [], time_out: int = 120,
|
|
394
417
|
correct_mode: bool = True, correct_filepath: str = "",
|
|
395
418
|
selected_scanner: str = ALL_MODE, path_to_exclude: list = [],
|
|
396
|
-
all_exclude_mode: tuple = (), hide_progress: bool = False
|
|
419
|
+
all_exclude_mode: tuple = (), hide_progress: bool = False, kb_url: str = "", kb_token: str = ""
|
|
397
420
|
) -> Tuple[bool, str, 'ScannerItem', list, list]:
|
|
398
421
|
"""
|
|
399
422
|
Run Scancode and scanoss.py for the given path.
|
|
@@ -405,6 +428,8 @@ def run_scanners(
|
|
|
405
428
|
:param called_by_cli: if not called by cli, initialize logger.
|
|
406
429
|
:param print_matched_text: if requested, output matched text (only for scancode).
|
|
407
430
|
:param format: output format (excel, csv, opossum).
|
|
431
|
+
:param kb_url: KB API base URL. If empty, read KB_URL environment variable, then use default.
|
|
432
|
+
:param kb_token: KB API bearer token. If empty, read KB_TOKEN environment variable.
|
|
408
433
|
:return success: success or failure of scancode.
|
|
409
434
|
:return result_log["Scan Result"]:
|
|
410
435
|
:return merged_result: merged scan result of scancode and scanoss.
|
|
@@ -421,6 +446,7 @@ def run_scanners(
|
|
|
421
446
|
result_log = {}
|
|
422
447
|
scan_item = []
|
|
423
448
|
api_limit_exceed = False
|
|
449
|
+
kb_url, kb_token = resolve_kb_config(kb_url, kb_token)
|
|
424
450
|
|
|
425
451
|
success, msg, output_path, output_files, output_extensions, formats = check_output_formats_v2(output_file_name, formats)
|
|
426
452
|
|
|
@@ -432,6 +458,8 @@ def run_scanners(
|
|
|
432
458
|
logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
|
|
433
459
|
True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
|
|
434
460
|
|
|
461
|
+
logger.info(f"Tool Info : {result_log['Tool Info']}")
|
|
462
|
+
|
|
435
463
|
if '.xlsx' not in output_extensions and print_matched_text:
|
|
436
464
|
logger.warning("-m option is only available for excel.")
|
|
437
465
|
print_matched_text = False
|
|
@@ -469,15 +497,17 @@ def run_scanners(
|
|
|
469
497
|
if selected_scanner in SCANNER_TYPE:
|
|
470
498
|
run_kb = True if selected_scanner in ['kb', ALL_MODE] else False
|
|
471
499
|
if run_kb:
|
|
472
|
-
if not check_kb_server_reachable():
|
|
500
|
+
if not check_kb_server_reachable(kb_url, kb_token):
|
|
473
501
|
run_kb = False
|
|
474
|
-
run_kb_msg = "KB Unreachable"
|
|
502
|
+
run_kb_msg = f"KB({kb_url}) Unreachable"
|
|
475
503
|
else:
|
|
476
|
-
run_kb_msg = "KB Enabled"
|
|
504
|
+
run_kb_msg = f"KB({kb_url}) Enabled"
|
|
477
505
|
|
|
478
506
|
spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
|
|
479
507
|
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
|
|
480
|
-
path_to_scan, run_kb, manifest_licenses, excluded_files,
|
|
508
|
+
path_to_scan, run_kb, manifest_licenses, excluded_files,
|
|
509
|
+
hide_progress, kb_url, kb_token)
|
|
510
|
+
mark_oss_info_correction_files_as_excluded(merged_result)
|
|
481
511
|
scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
|
|
482
512
|
print_matched_text, output_path, output_files, output_extensions, correct_mode,
|
|
483
513
|
correct_filepath, path_to_scan, excluded_path_without_dot, formats,
|
{fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_manifest_extractor.py
RENAMED
|
@@ -125,6 +125,63 @@ def get_licenses_from_setup_py(file_path: str) -> list[str]:
|
|
|
125
125
|
return _split_spdx_expression(value)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
+
def get_licenses_from_pyproject_toml(file_path: str) -> list[str]:
|
|
129
|
+
try:
|
|
130
|
+
data = None
|
|
131
|
+
try:
|
|
132
|
+
import tomllib as toml_loader # Python 3.11+
|
|
133
|
+
with open(file_path, 'rb') as f:
|
|
134
|
+
data = toml_loader.load(f)
|
|
135
|
+
except Exception:
|
|
136
|
+
try:
|
|
137
|
+
import tomli as toml_loader # Backport
|
|
138
|
+
with open(file_path, 'rb') as f:
|
|
139
|
+
data = toml_loader.load(f)
|
|
140
|
+
except Exception:
|
|
141
|
+
data = None
|
|
142
|
+
|
|
143
|
+
if isinstance(data, dict):
|
|
144
|
+
project_tbl = data.get('project') or {}
|
|
145
|
+
license_value = project_tbl.get('license')
|
|
146
|
+
if isinstance(license_value, str) and license_value.strip():
|
|
147
|
+
return [license_value.strip()]
|
|
148
|
+
if isinstance(license_value, dict):
|
|
149
|
+
text_value = license_value.get('text')
|
|
150
|
+
if isinstance(text_value, str) and text_value.strip():
|
|
151
|
+
return [text_value.strip()]
|
|
152
|
+
if license_value.get('file'):
|
|
153
|
+
return []
|
|
154
|
+
except Exception as ex:
|
|
155
|
+
logger.info(f"Failed to parse pyproject.toml via toml parser for {file_path}: {ex}")
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
159
|
+
content = f.read()
|
|
160
|
+
project_match = re.search(r'^\s*\[project\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
|
|
161
|
+
if not project_match:
|
|
162
|
+
return []
|
|
163
|
+
block = project_match.group(1)
|
|
164
|
+
m = re.search(r'^\s*license\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)', block,
|
|
165
|
+
flags=re.MULTILINE | re.DOTALL)
|
|
166
|
+
if m:
|
|
167
|
+
val = m.group('val').strip()
|
|
168
|
+
if val:
|
|
169
|
+
return [val]
|
|
170
|
+
m2 = re.search(r'^\s*license\s*=\s*\{[^}]*?\btext\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)',
|
|
171
|
+
block, flags=re.MULTILINE | re.DOTALL)
|
|
172
|
+
if m2:
|
|
173
|
+
val = m2.group('val').strip()
|
|
174
|
+
if val:
|
|
175
|
+
return [val]
|
|
176
|
+
m3 = re.search(r'^\s*license\s*=\s*\{[^}]*?\bfile\s*=', block, flags=re.MULTILINE | re.DOTALL)
|
|
177
|
+
if m3:
|
|
178
|
+
return []
|
|
179
|
+
except Exception as ex:
|
|
180
|
+
logger.info(f"Failed to parse pyproject.toml {file_path}: {ex}")
|
|
181
|
+
return []
|
|
182
|
+
return []
|
|
183
|
+
|
|
184
|
+
|
|
128
185
|
def get_licenses_from_podspec(file_path: str) -> list[str]:
|
|
129
186
|
try:
|
|
130
187
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
@@ -207,6 +264,49 @@ def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
|
|
|
207
264
|
return []
|
|
208
265
|
|
|
209
266
|
|
|
267
|
+
def get_licenses_from_huggingface_metadata(file_path: str) -> list[str]:
|
|
268
|
+
try:
|
|
269
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
270
|
+
data = json.load(f)
|
|
271
|
+
except Exception as ex:
|
|
272
|
+
logger.info(f"Failed to read huggingface_hub_metadata.json {file_path}: {ex}")
|
|
273
|
+
return []
|
|
274
|
+
|
|
275
|
+
if not isinstance(data, dict):
|
|
276
|
+
return []
|
|
277
|
+
|
|
278
|
+
licenses: list[str] = []
|
|
279
|
+
|
|
280
|
+
def append_license(value):
|
|
281
|
+
if isinstance(value, str):
|
|
282
|
+
token = value.strip()
|
|
283
|
+
if token and token not in licenses:
|
|
284
|
+
licenses.append(token)
|
|
285
|
+
elif isinstance(value, list):
|
|
286
|
+
for item in value:
|
|
287
|
+
append_license(item)
|
|
288
|
+
|
|
289
|
+
# Hugging Face model API commonly returns top-level `license`
|
|
290
|
+
append_license(data.get('license'))
|
|
291
|
+
|
|
292
|
+
# Some metadata may include cardData/license variants
|
|
293
|
+
card_data = data.get('cardData')
|
|
294
|
+
if isinstance(card_data, dict):
|
|
295
|
+
append_license(card_data.get('license'))
|
|
296
|
+
append_license(card_data.get('licenses'))
|
|
297
|
+
|
|
298
|
+
# Many Hub API responses expose license only via tags, e.g. "license:apache-2.0".
|
|
299
|
+
tags = data.get('tags')
|
|
300
|
+
if isinstance(tags, list):
|
|
301
|
+
for tag in tags:
|
|
302
|
+
if isinstance(tag, str):
|
|
303
|
+
prefix = 'license:'
|
|
304
|
+
if tag.lower().startswith(prefix):
|
|
305
|
+
append_license(tag[len(prefix):].strip())
|
|
306
|
+
|
|
307
|
+
return licenses
|
|
308
|
+
|
|
309
|
+
|
|
210
310
|
def get_manifest_licenses(file_path: str) -> list[str]:
|
|
211
311
|
if file_path.endswith('.pom'):
|
|
212
312
|
try:
|
|
@@ -235,6 +335,12 @@ def get_manifest_licenses(file_path: str) -> list[str]:
|
|
|
235
335
|
except Exception as ex:
|
|
236
336
|
logger.info(f"Failed to extract license from setup.py {file_path}: {ex}")
|
|
237
337
|
return []
|
|
338
|
+
elif os.path.basename(file_path).lower() == 'pyproject.toml':
|
|
339
|
+
try:
|
|
340
|
+
return get_licenses_from_pyproject_toml(file_path)
|
|
341
|
+
except Exception as ex:
|
|
342
|
+
logger.info(f"Failed to extract license from pyproject.toml {file_path}: {ex}")
|
|
343
|
+
return []
|
|
238
344
|
elif os.path.basename(file_path).lower().endswith('.podspec'):
|
|
239
345
|
try:
|
|
240
346
|
return get_licenses_from_podspec(file_path)
|
|
@@ -247,3 +353,9 @@ def get_manifest_licenses(file_path: str) -> list[str]:
|
|
|
247
353
|
except Exception as ex:
|
|
248
354
|
logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
|
|
249
355
|
return []
|
|
356
|
+
elif os.path.basename(file_path).lower() == 'huggingface_hub_metadata.json':
|
|
357
|
+
try:
|
|
358
|
+
return get_licenses_from_huggingface_metadata(file_path)
|
|
359
|
+
except Exception as ex:
|
|
360
|
+
logger.info(f"Failed to extract license from huggingface_hub_metadata.json {file_path}: {ex}")
|
|
361
|
+
return []
|
|
@@ -103,6 +103,9 @@ def run_scan(
|
|
|
103
103
|
if not called_by_cli:
|
|
104
104
|
logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"),
|
|
105
105
|
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
|
|
106
|
+
|
|
107
|
+
logger.info(f"Tool Info : {_result_log['Tool Info']}")
|
|
108
|
+
|
|
106
109
|
num_cores = multiprocessing.cpu_count() - 1 if num_cores < 0 else num_cores
|
|
107
110
|
|
|
108
111
|
if os.path.isdir(path_to_scan):
|
|
@@ -113,6 +116,7 @@ def run_scan(
|
|
|
113
116
|
pretty_params["path_to_exclude"] = path_to_exclude
|
|
114
117
|
pretty_params["output_file"] = output_file_name
|
|
115
118
|
total_files_to_excluded = []
|
|
119
|
+
binary_files_to_exclude = []
|
|
116
120
|
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
117
121
|
if path_to_exclude:
|
|
118
122
|
for path in path_to_exclude:
|
|
@@ -166,11 +170,12 @@ def run_scan(
|
|
|
166
170
|
continue
|
|
167
171
|
rel_path = os.path.relpath(full_path, abs_path_to_scan)
|
|
168
172
|
rel_norm = os.path.normpath(rel_path).replace("\\", "/")
|
|
169
|
-
|
|
173
|
+
binary_files_to_exclude.append(rel_norm)
|
|
170
174
|
logger.debug(f"Excluded binary from scancode: {rel_norm}")
|
|
171
175
|
|
|
172
|
-
|
|
173
|
-
|
|
176
|
+
all_excluded_for_scancode = list(excluded_files) + binary_files_to_exclude
|
|
177
|
+
if all_excluded_for_scancode:
|
|
178
|
+
total_files_to_excluded.extend(f"**/{file_path}" for file_path in all_excluded_for_scancode)
|
|
174
179
|
|
|
175
180
|
total_files_to_excluded = sorted(list(set(total_files_to_excluded)))
|
|
176
181
|
ignore_tuple = tuple(total_files_to_excluded)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fosslight_source
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.16
|
|
4
4
|
Summary: FOSSLight Source Scanner
|
|
5
5
|
Author: LG Electronics
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -29,6 +29,7 @@ Requires-Dist: scancode-toolkit>=32.0.2
|
|
|
29
29
|
Requires-Dist: fingerprints==1.2.3
|
|
30
30
|
Requires-Dist: normality==2.6.1
|
|
31
31
|
Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
|
|
32
|
+
Requires-Dist: tomli; python_version < "3.11"
|
|
32
33
|
Requires-Dist: tqdm
|
|
33
34
|
Dynamic: license-file
|
|
34
35
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_license_matched.py
RENAMED
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/_parsing_scanoss_file.py
RENAMED
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source/run_spdx_extractor.py
RENAMED
|
File without changes
|
{fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{fosslight_source-2.2.14 → fosslight_source-2.2.16}/src/fosslight_source.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|