fosslight-source 2.2.1__tar.gz → 2.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fosslight_source-2.2.1/src/fosslight_source.egg-info → fosslight_source-2.2.2}/PKG-INFO +1 -1
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/setup.py +1 -1
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_parsing_scancode_file_item.py +2 -62
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_scan_item.py +1 -1
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/cli.py +53 -3
- fosslight_source-2.2.2/src/fosslight_source/run_manifest_extractor.py +251 -0
- fosslight_source-2.2.2/src/fosslight_source/run_spdx_extractor.py +26 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2/src/fosslight_source.egg-info}/PKG-INFO +1 -1
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/SOURCES.txt +1 -0
- fosslight_source-2.2.1/src/fosslight_source/run_spdx_extractor.py +0 -37
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/LICENSE +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/MANIFEST.in +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/README.md +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/requirements.txt +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/setup.cfg +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/__init__.py +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_help.py +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_license_matched.py +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/run_scancode.py +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/run_scanoss.py +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/entry_points.txt +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/requires.txt +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/top_level.txt +0 -0
- {fosslight_source-2.2.1 → fosslight_source-2.2.2}/tests/test_tox.py +0 -0
|
@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
|
|
|
14
14
|
if __name__ == "__main__":
|
|
15
15
|
setup(
|
|
16
16
|
name='fosslight_source',
|
|
17
|
-
version='2.2.
|
|
17
|
+
version='2.2.2',
|
|
18
18
|
package_dir={"": "src"},
|
|
19
19
|
packages=find_packages(where='src'),
|
|
20
20
|
description='FOSSLight Source Scanner',
|
|
@@ -7,12 +7,10 @@ import os
|
|
|
7
7
|
import logging
|
|
8
8
|
import re
|
|
9
9
|
import fosslight_util.constant as constant
|
|
10
|
-
from fosslight_util.get_pom_license import get_license_from_pom
|
|
11
10
|
from ._license_matched import MatchedLicense
|
|
12
11
|
from ._scan_item import SourceItem
|
|
13
12
|
from ._scan_item import replace_word
|
|
14
13
|
from ._scan_item import is_notice_file
|
|
15
|
-
from ._scan_item import is_manifest_file
|
|
16
14
|
from typing import Tuple
|
|
17
15
|
|
|
18
16
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
@@ -181,35 +179,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
181
179
|
if len(license_detected) > 0:
|
|
182
180
|
result_item.licenses = license_detected
|
|
183
181
|
|
|
184
|
-
detected_without_pom = []
|
|
185
|
-
if is_manifest_file(file_path) and len(license_detected) > 0:
|
|
186
|
-
result_item.is_manifest_file = True
|
|
187
|
-
if file_path.endswith('.pom'):
|
|
188
|
-
try:
|
|
189
|
-
pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
|
|
190
|
-
normalize_pom_licenses = []
|
|
191
|
-
if pom_licenses:
|
|
192
|
-
pom_license_list = pom_licenses.split(', ')
|
|
193
|
-
for pom_license in pom_license_list:
|
|
194
|
-
if pom_license not in license_detected:
|
|
195
|
-
for lic_matched_key, lic_info in license_list.items():
|
|
196
|
-
if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
|
|
197
|
-
matched_txt = str(lic_info.matched_text).replace(',', '')
|
|
198
|
-
if pom_license in matched_txt:
|
|
199
|
-
normalize_pom_licenses.append(lic_info.license)
|
|
200
|
-
break
|
|
201
|
-
else:
|
|
202
|
-
normalize_pom_licenses.append(pom_license)
|
|
203
|
-
detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
|
|
204
|
-
if detected_without_pom:
|
|
205
|
-
result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
|
|
206
|
-
result_item.licenses = []
|
|
207
|
-
result_item.licenses = normalize_pom_licenses
|
|
208
|
-
if not normalize_pom_licenses:
|
|
209
|
-
result_item.exclude = True
|
|
210
|
-
except Exception as ex:
|
|
211
|
-
logger.info(f"Failed to extract license from POM {file_path}: {ex}")
|
|
212
|
-
|
|
213
182
|
# Remove copyright info for license text file of GPL family
|
|
214
183
|
if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
|
|
215
184
|
logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
|
|
@@ -217,7 +186,7 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
217
186
|
else:
|
|
218
187
|
result_item.copyright = copyright_value_list
|
|
219
188
|
|
|
220
|
-
if len(license_expression_list) > 0
|
|
189
|
+
if len(license_expression_list) > 0:
|
|
221
190
|
license_expression_list = list(
|
|
222
191
|
set(license_expression_list))
|
|
223
192
|
result_item.comment = ','.join(license_expression_list)
|
|
@@ -314,35 +283,6 @@ def parsing_scancode_32_later(
|
|
|
314
283
|
file.get("percentage_of_license_text", 0) > 90 and not is_source_file
|
|
315
284
|
)
|
|
316
285
|
|
|
317
|
-
detected_without_pom = []
|
|
318
|
-
if is_manifest_file(file_path) and len(license_detected) > 0:
|
|
319
|
-
result_item.is_manifest_file = True
|
|
320
|
-
if file_path.endswith('.pom'):
|
|
321
|
-
try:
|
|
322
|
-
pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
|
|
323
|
-
normalize_pom_licenses = []
|
|
324
|
-
if pom_licenses:
|
|
325
|
-
pom_license_list = pom_licenses.split(', ')
|
|
326
|
-
for pom_license in pom_license_list:
|
|
327
|
-
if pom_license not in license_detected:
|
|
328
|
-
for lic_matched_key, lic_info in license_list.items():
|
|
329
|
-
if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
|
|
330
|
-
matched_txt = str(lic_info.matched_text).replace(',', '')
|
|
331
|
-
if pom_license in matched_txt:
|
|
332
|
-
normalize_pom_licenses.append(lic_info.license)
|
|
333
|
-
break
|
|
334
|
-
else:
|
|
335
|
-
normalize_pom_licenses.append(pom_license)
|
|
336
|
-
detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
|
|
337
|
-
if detected_without_pom:
|
|
338
|
-
result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
|
|
339
|
-
result_item.licenses = []
|
|
340
|
-
result_item.licenses = normalize_pom_licenses
|
|
341
|
-
if not normalize_pom_licenses:
|
|
342
|
-
result_item.exclude = True
|
|
343
|
-
except Exception as ex:
|
|
344
|
-
logger.info(f"Failed to extract license from POM {file_path}: {ex}")
|
|
345
|
-
|
|
346
286
|
# Remove copyright info for license text file of GPL family
|
|
347
287
|
if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
|
|
348
288
|
logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
|
|
@@ -350,7 +290,7 @@ def parsing_scancode_32_later(
|
|
|
350
290
|
else:
|
|
351
291
|
result_item.copyright = copyright_value_list
|
|
352
292
|
|
|
353
|
-
if len(license_detected) > 1
|
|
293
|
+
if len(license_detected) > 1:
|
|
354
294
|
license_expression_spdx = file.get("detected_license_expression_spdx", "")
|
|
355
295
|
license_expression = file.get("detected_license_expression", "")
|
|
356
296
|
if license_expression_spdx:
|
|
@@ -18,7 +18,7 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
|
|
|
18
18
|
_notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
|
|
19
19
|
'[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
|
|
20
20
|
'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
|
|
21
|
-
_manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'
|
|
21
|
+
_manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'setup\.cfg$', r'.*\.podspec$', r'Cargo\.toml$']
|
|
22
22
|
MAX_LICENSE_LENGTH = 200
|
|
23
23
|
MAX_LICENSE_TOTAL_LENGTH = 600
|
|
24
24
|
SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
|
|
@@ -25,9 +25,12 @@ from .run_scanoss import get_scanoss_extra_info
|
|
|
25
25
|
import yaml
|
|
26
26
|
import argparse
|
|
27
27
|
from .run_spdx_extractor import get_spdx_downloads
|
|
28
|
+
from .run_manifest_extractor import get_manifest_licenses
|
|
28
29
|
from ._scan_item import SourceItem, KB_URL
|
|
29
30
|
from fosslight_util.oss_item import ScannerItem
|
|
30
31
|
from typing import Tuple
|
|
32
|
+
from ._scan_item import is_manifest_file
|
|
33
|
+
|
|
31
34
|
|
|
32
35
|
SRC_SHEET_NAME = 'SRC_FL_Source'
|
|
33
36
|
SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
@@ -265,7 +268,7 @@ def check_kb_server_reachable() -> bool:
|
|
|
265
268
|
|
|
266
269
|
def merge_results(
|
|
267
270
|
scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
|
|
268
|
-
path_to_scan: str = "", run_kb: bool = False
|
|
271
|
+
path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {}
|
|
269
272
|
) -> list:
|
|
270
273
|
|
|
271
274
|
"""
|
|
@@ -291,6 +294,19 @@ def merge_results(
|
|
|
291
294
|
new_result_item = SourceItem(file_name)
|
|
292
295
|
new_result_item.download_location = download_location
|
|
293
296
|
scancode_result.append(new_result_item)
|
|
297
|
+
if manifest_licenses:
|
|
298
|
+
for file_name, licenses in manifest_licenses.items():
|
|
299
|
+
if file_name in scancode_result:
|
|
300
|
+
merged_result_item = scancode_result[scancode_result.index(file_name)]
|
|
301
|
+
# overwrite existing detected licenses with manifest-provided licenses
|
|
302
|
+
merged_result_item.licenses = [] # clear existing licenses (setter clears when value falsy)
|
|
303
|
+
merged_result_item.licenses = licenses
|
|
304
|
+
merged_result_item.is_manifest_file = True
|
|
305
|
+
else:
|
|
306
|
+
new_result_item = SourceItem(file_name)
|
|
307
|
+
new_result_item.licenses = licenses
|
|
308
|
+
new_result_item.is_manifest_file = True
|
|
309
|
+
scancode_result.append(new_result_item)
|
|
294
310
|
if run_kb and not check_kb_server_reachable():
|
|
295
311
|
run_kb = False
|
|
296
312
|
if run_kb:
|
|
@@ -369,8 +385,9 @@ def run_scanners(
|
|
|
369
385
|
num_cores, excluded_path_with_default_exclusion, excluded_files)
|
|
370
386
|
if selected_scanner in SCANNER_TYPE:
|
|
371
387
|
run_kb = True if selected_scanner in ['kb', 'all'] else False
|
|
372
|
-
spdx_downloads =
|
|
373
|
-
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
|
|
388
|
+
spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
|
|
389
|
+
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
|
|
390
|
+
path_to_scan, run_kb, manifest_licenses)
|
|
374
391
|
scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
|
|
375
392
|
print_matched_text, output_path, output_files, output_extensions, correct_mode,
|
|
376
393
|
correct_filepath, path_to_scan, excluded_path_without_dot, formats,
|
|
@@ -385,5 +402,38 @@ def run_scanners(
|
|
|
385
402
|
return success, result_log.get(RESULT_KEY, ""), scan_item, license_list, scanoss_result
|
|
386
403
|
|
|
387
404
|
|
|
405
|
+
def metadata_collector(path_to_scan: str, excluded_files: set) -> dict:
|
|
406
|
+
"""
|
|
407
|
+
Collect metadata for merging.
|
|
408
|
+
|
|
409
|
+
- Traverse files with exclusions applied
|
|
410
|
+
- spdx_downloads: {rel_path: [download_urls]}
|
|
411
|
+
- manifest_licenses: {rel_path: [license_names]}
|
|
412
|
+
|
|
413
|
+
:return: (spdx_downloads, manifest_licenses)
|
|
414
|
+
"""
|
|
415
|
+
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
416
|
+
spdx_downloads = {}
|
|
417
|
+
manifest_licenses = {}
|
|
418
|
+
|
|
419
|
+
for root, dirs, files in os.walk(path_to_scan):
|
|
420
|
+
for file in files:
|
|
421
|
+
file_path = os.path.join(root, file)
|
|
422
|
+
rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
|
|
423
|
+
if rel_path_file in excluded_files:
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
downloads = get_spdx_downloads(file_path)
|
|
427
|
+
if downloads:
|
|
428
|
+
spdx_downloads[rel_path_file] = downloads
|
|
429
|
+
|
|
430
|
+
if is_manifest_file(file_path):
|
|
431
|
+
licenses = get_manifest_licenses(file_path)
|
|
432
|
+
if licenses:
|
|
433
|
+
manifest_licenses[rel_path_file] = licenses
|
|
434
|
+
|
|
435
|
+
return spdx_downloads, manifest_licenses
|
|
436
|
+
|
|
437
|
+
|
|
388
438
|
if __name__ == '__main__':
|
|
389
439
|
main()
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright (c) 2025 LG Electronics Inc.
|
|
4
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
import logging
|
|
9
|
+
from fosslight_util.get_pom_license import get_license_from_pom
|
|
10
|
+
import fosslight_util.constant as constant
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _split_spdx_expression(value: str) -> list[str]:
|
|
16
|
+
parts = re.split(r'\s+(?:OR|AND)\s+|[|]{2}|&&', value, flags=re.IGNORECASE)
|
|
17
|
+
tokens: list[str] = []
|
|
18
|
+
for part in parts:
|
|
19
|
+
token = part.strip().strip('()')
|
|
20
|
+
if token:
|
|
21
|
+
tokens.append(token)
|
|
22
|
+
unique: list[str] = []
|
|
23
|
+
for t in tokens:
|
|
24
|
+
if t not in unique:
|
|
25
|
+
unique.append(t)
|
|
26
|
+
return unique
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_licenses_from_package_json(file_path: str) -> list[str]:
|
|
30
|
+
try:
|
|
31
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
32
|
+
data = json.load(f)
|
|
33
|
+
except Exception as ex:
|
|
34
|
+
logger.info(f"Failed to read package.json {file_path}: {ex}")
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
if not isinstance(data, dict):
|
|
38
|
+
return []
|
|
39
|
+
|
|
40
|
+
licenses: list[str] = []
|
|
41
|
+
license_field = data.get('license')
|
|
42
|
+
|
|
43
|
+
if isinstance(license_field, str):
|
|
44
|
+
value = license_field.strip()
|
|
45
|
+
if value.upper() == 'UNLICENSED':
|
|
46
|
+
return []
|
|
47
|
+
if value.upper().startswith('SEE LICENSE IN'):
|
|
48
|
+
return []
|
|
49
|
+
licenses.extend(_split_spdx_expression(value))
|
|
50
|
+
elif isinstance(license_field, dict):
|
|
51
|
+
type_val = license_field.get('type')
|
|
52
|
+
if isinstance(type_val, str):
|
|
53
|
+
type_val = type_val.strip()
|
|
54
|
+
if type_val and type_val.upper() != 'UNLICENSED':
|
|
55
|
+
licenses.append(type_val)
|
|
56
|
+
|
|
57
|
+
if not licenses:
|
|
58
|
+
legacy = data.get('licenses')
|
|
59
|
+
if isinstance(legacy, list):
|
|
60
|
+
for item in legacy:
|
|
61
|
+
if isinstance(item, str):
|
|
62
|
+
token = item.strip()
|
|
63
|
+
if token and token.upper() != 'UNLICENSED':
|
|
64
|
+
licenses.append(token)
|
|
65
|
+
elif isinstance(item, dict):
|
|
66
|
+
t = item.get('type')
|
|
67
|
+
if isinstance(t, str):
|
|
68
|
+
t = t.strip()
|
|
69
|
+
if t and t.upper() != 'UNLICENSED':
|
|
70
|
+
licenses.append(t)
|
|
71
|
+
|
|
72
|
+
unique: list[str] = []
|
|
73
|
+
for lic in licenses:
|
|
74
|
+
if lic not in unique:
|
|
75
|
+
unique.append(lic)
|
|
76
|
+
return unique
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_licenses_from_setup_cfg(file_path: str) -> list[str]:
|
|
80
|
+
try:
|
|
81
|
+
import configparser
|
|
82
|
+
parser = configparser.ConfigParser()
|
|
83
|
+
parser.read(file_path, encoding='utf-8')
|
|
84
|
+
if parser.has_section('metadata'):
|
|
85
|
+
license_value = parser.get('metadata', 'license', fallback='').strip()
|
|
86
|
+
if license_value:
|
|
87
|
+
return _split_spdx_expression(license_value)
|
|
88
|
+
except Exception as ex:
|
|
89
|
+
logger.info(f"Failed to parse setup.cfg with configparser for {file_path}: {ex}")
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
93
|
+
content = f.read()
|
|
94
|
+
meta_match = re.search(r'^\s*\[metadata\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
|
|
95
|
+
if not meta_match:
|
|
96
|
+
return []
|
|
97
|
+
block = meta_match.group(1)
|
|
98
|
+
m = re.search(r'^\s*license\s*=\s*(.+)$', block, flags=re.MULTILINE)
|
|
99
|
+
if not m:
|
|
100
|
+
return []
|
|
101
|
+
val = m.group(1).strip()
|
|
102
|
+
if (len(val) >= 2) and ((val[0] == val[-1]) and val[0] in ('"', "'")):
|
|
103
|
+
val = val[1:-1].strip()
|
|
104
|
+
if not val:
|
|
105
|
+
return []
|
|
106
|
+
return _split_spdx_expression(val)
|
|
107
|
+
except Exception as ex:
|
|
108
|
+
logger.info(f"Failed to parse setup.cfg {file_path} via regex fallback: {ex}")
|
|
109
|
+
return []
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def get_licenses_from_setup_py(file_path: str) -> list[str]:
|
|
113
|
+
try:
|
|
114
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
115
|
+
content = f.read()
|
|
116
|
+
except Exception as ex:
|
|
117
|
+
logger.info(f"Failed to read setup.py {file_path}: {ex}")
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
match = re.search(r'license\s*=\s*([\'"]{1,3})(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
|
|
121
|
+
if not match:
|
|
122
|
+
return []
|
|
123
|
+
value = match.group(2).strip()
|
|
124
|
+
if not value:
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
return _split_spdx_expression(value)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_licenses_from_podspec(file_path: str) -> list[str]:
|
|
131
|
+
try:
|
|
132
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
133
|
+
content = f.read()
|
|
134
|
+
except Exception as ex:
|
|
135
|
+
logger.info(f"Failed to read podspec {file_path}: {ex}")
|
|
136
|
+
return []
|
|
137
|
+
|
|
138
|
+
m = re.search(r'\blicense\s*=\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE)
|
|
139
|
+
if m:
|
|
140
|
+
value = m.group(2).strip()
|
|
141
|
+
if value:
|
|
142
|
+
return _split_spdx_expression(value)
|
|
143
|
+
|
|
144
|
+
m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
|
|
145
|
+
if m:
|
|
146
|
+
value = m.group(2).strip()
|
|
147
|
+
if value:
|
|
148
|
+
return _split_spdx_expression(value)
|
|
149
|
+
|
|
150
|
+
m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*:(\w+)', content, flags=re.IGNORECASE | re.DOTALL)
|
|
151
|
+
if m:
|
|
152
|
+
value = m.group(1).strip()
|
|
153
|
+
if value:
|
|
154
|
+
return _split_spdx_expression(value)
|
|
155
|
+
|
|
156
|
+
m = re.search(r'\blicense\s*=\s*:(\w+)', content, flags=re.DOTALL | re.IGNORECASE)
|
|
157
|
+
if m:
|
|
158
|
+
value = m.group(1).strip()
|
|
159
|
+
if value:
|
|
160
|
+
return _split_spdx_expression(value)
|
|
161
|
+
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
|
|
166
|
+
try:
|
|
167
|
+
data = None
|
|
168
|
+
try:
|
|
169
|
+
import tomllib as toml_loader # Python 3.11+
|
|
170
|
+
with open(file_path, 'rb') as f:
|
|
171
|
+
data = toml_loader.load(f)
|
|
172
|
+
except Exception:
|
|
173
|
+
try:
|
|
174
|
+
import tomli as toml_loader # Backport
|
|
175
|
+
with open(file_path, 'rb') as f:
|
|
176
|
+
data = toml_loader.load(f)
|
|
177
|
+
except Exception:
|
|
178
|
+
data = None
|
|
179
|
+
|
|
180
|
+
if isinstance(data, dict):
|
|
181
|
+
package_tbl = data.get('package') or {}
|
|
182
|
+
license_value = package_tbl.get('license')
|
|
183
|
+
if isinstance(license_value, str) and license_value.strip():
|
|
184
|
+
return _split_spdx_expression(license_value.strip())
|
|
185
|
+
if package_tbl.get('license-file'):
|
|
186
|
+
return []
|
|
187
|
+
except Exception as ex:
|
|
188
|
+
logger.info(f"Failed to parse Cargo.toml via toml parser for {file_path}: {ex}")
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
192
|
+
content = f.read()
|
|
193
|
+
pkg_match = re.search(r'^\s*\[package\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
|
|
194
|
+
if not pkg_match:
|
|
195
|
+
return []
|
|
196
|
+
block = pkg_match.group(1)
|
|
197
|
+
m = re.search(r'^\s*license\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)', block, flags=re.MULTILINE | re.DOTALL)
|
|
198
|
+
if m:
|
|
199
|
+
val = m.group('val').strip()
|
|
200
|
+
if val:
|
|
201
|
+
return _split_spdx_expression(val)
|
|
202
|
+
m2 = re.search(r'^\s*license-file\s*=\s*(?:"""|\'\'\'|"|\')(.*?)(?:"""|\'\'\'|"|\')', block,
|
|
203
|
+
flags=re.MULTILINE | re.DOTALL)
|
|
204
|
+
if m2:
|
|
205
|
+
return []
|
|
206
|
+
except Exception as ex:
|
|
207
|
+
logger.info(f"Failed to parse Cargo.toml {file_path}: {ex}")
|
|
208
|
+
return []
|
|
209
|
+
return []
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def get_manifest_licenses(file_path: str) -> list[str]:
|
|
213
|
+
if file_path.endswith('.pom'):
|
|
214
|
+
try:
|
|
215
|
+
pom_licenses = get_license_from_pom(group_id='', artifact_id='', version='', pom_path=file_path, check_parent=True)
|
|
216
|
+
if not pom_licenses:
|
|
217
|
+
return []
|
|
218
|
+
return [x.strip() for x in pom_licenses.split(', ') if x.strip()]
|
|
219
|
+
except Exception as ex:
|
|
220
|
+
logger.info(f"Failed to extract license from POM {file_path}: {ex}")
|
|
221
|
+
return []
|
|
222
|
+
elif os.path.basename(file_path).lower() == 'package.json':
|
|
223
|
+
try:
|
|
224
|
+
return get_licenses_from_package_json(file_path)
|
|
225
|
+
except Exception as ex:
|
|
226
|
+
logger.info(f"Failed to extract license from package.json {file_path}: {ex}")
|
|
227
|
+
return []
|
|
228
|
+
elif os.path.basename(file_path).lower() == 'setup.cfg':
|
|
229
|
+
try:
|
|
230
|
+
return get_licenses_from_setup_cfg(file_path)
|
|
231
|
+
except Exception as ex:
|
|
232
|
+
logger.info(f"Failed to extract license from setup.cfg {file_path}: {ex}")
|
|
233
|
+
return []
|
|
234
|
+
elif os.path.basename(file_path).lower() == 'setup.py':
|
|
235
|
+
try:
|
|
236
|
+
return get_licenses_from_setup_py(file_path)
|
|
237
|
+
except Exception as ex:
|
|
238
|
+
logger.info(f"Failed to extract license from setup.py {file_path}: {ex}")
|
|
239
|
+
return []
|
|
240
|
+
elif os.path.basename(file_path).lower().endswith('.podspec'):
|
|
241
|
+
try:
|
|
242
|
+
return get_licenses_from_podspec(file_path)
|
|
243
|
+
except Exception as ex:
|
|
244
|
+
logger.info(f"Failed to extract license from podspec {file_path}: {ex}")
|
|
245
|
+
return []
|
|
246
|
+
elif os.path.basename(file_path).lower() == 'cargo.toml':
|
|
247
|
+
try:
|
|
248
|
+
return get_licenses_from_cargo_toml(file_path)
|
|
249
|
+
except Exception as ex:
|
|
250
|
+
logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
|
|
251
|
+
return []
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright (c) 2023 LG Electronics Inc.
|
|
4
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
import fosslight_util.constant as constant
|
|
10
|
+
import mmap
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_spdx_downloads(file_path: str) -> list[str]:
|
|
16
|
+
results = []
|
|
17
|
+
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
|
|
18
|
+
try:
|
|
19
|
+
if os.path.getsize(file_path) > 0:
|
|
20
|
+
with open(file_path, "r") as f:
|
|
21
|
+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
22
|
+
for word in find_word.findall(mmap_obj):
|
|
23
|
+
results.append(word.decode('utf-8'))
|
|
24
|
+
except Exception as ex:
|
|
25
|
+
logger.warning(f"Failed to extract SPDX download location. {file_path}, {ex}")
|
|
26
|
+
return results
|
|
@@ -10,6 +10,7 @@ src/fosslight_source/_parsing_scancode_file_item.py
|
|
|
10
10
|
src/fosslight_source/_parsing_scanoss_file.py
|
|
11
11
|
src/fosslight_source/_scan_item.py
|
|
12
12
|
src/fosslight_source/cli.py
|
|
13
|
+
src/fosslight_source/run_manifest_extractor.py
|
|
13
14
|
src/fosslight_source/run_scancode.py
|
|
14
15
|
src/fosslight_source/run_scanoss.py
|
|
15
16
|
src/fosslight_source/run_spdx_extractor.py
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
# Copyright (c) 2023 LG Electronics Inc.
|
|
4
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import logging
|
|
8
|
-
import re
|
|
9
|
-
import fosslight_util.constant as constant
|
|
10
|
-
import mmap
|
|
11
|
-
|
|
12
|
-
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def get_spdx_downloads(path_to_scan: str, path_to_exclude: set = None) -> dict:
|
|
16
|
-
download_dict = {}
|
|
17
|
-
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
|
|
18
|
-
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
19
|
-
|
|
20
|
-
for root, dirs, files in os.walk(path_to_scan):
|
|
21
|
-
for file in files:
|
|
22
|
-
file_path = os.path.join(root, file)
|
|
23
|
-
rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
|
|
24
|
-
if rel_path_file in path_to_exclude:
|
|
25
|
-
continue
|
|
26
|
-
try:
|
|
27
|
-
if os.path.getsize(file_path) > 0:
|
|
28
|
-
with open(file_path, "r") as f:
|
|
29
|
-
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
30
|
-
for word in find_word.findall(mmap_obj):
|
|
31
|
-
if rel_path_file in download_dict:
|
|
32
|
-
download_dict[rel_path_file].append(word.decode('utf-8'))
|
|
33
|
-
else:
|
|
34
|
-
download_dict[rel_path_file] = [word.decode('utf-8')]
|
|
35
|
-
except Exception as ex:
|
|
36
|
-
logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {ex}")
|
|
37
|
-
return download_dict
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_parsing_scanoss_file.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/requires.txt
RENAMED
|
File without changes
|
{fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|