fosslight-source 2.2.0__py3-none-any.whl → 2.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fosslight_source/_parsing_scancode_file_item.py +3 -91
- fosslight_source/_parsing_scanoss_file.py +4 -18
- fosslight_source/_scan_item.py +1 -81
- fosslight_source/cli.py +68 -36
- fosslight_source/run_manifest_extractor.py +251 -0
- fosslight_source/run_scancode.py +33 -31
- fosslight_source/run_scanoss.py +9 -22
- fosslight_source/run_spdx_extractor.py +11 -35
- {fosslight_source-2.2.0.dist-info → fosslight_source-2.2.2.dist-info}/METADATA +2 -2
- fosslight_source-2.2.2.dist-info/RECORD +17 -0
- fosslight_source-2.2.0.dist-info/RECORD +0 -16
- {fosslight_source-2.2.0.dist-info → fosslight_source-2.2.2.dist-info}/WHEEL +0 -0
- {fosslight_source-2.2.0.dist-info → fosslight_source-2.2.2.dist-info}/entry_points.txt +0 -0
- {fosslight_source-2.2.0.dist-info → fosslight_source-2.2.2.dist-info}/licenses/LICENSE +0 -0
- {fosslight_source-2.2.0.dist-info → fosslight_source-2.2.2.dist-info}/top_level.txt +0 -0
|
@@ -7,15 +7,10 @@ import os
|
|
|
7
7
|
import logging
|
|
8
8
|
import re
|
|
9
9
|
import fosslight_util.constant as constant
|
|
10
|
-
from fosslight_util.get_pom_license import get_license_from_pom
|
|
11
10
|
from ._license_matched import MatchedLicense
|
|
12
11
|
from ._scan_item import SourceItem
|
|
13
|
-
from ._scan_item import is_exclude_dir
|
|
14
|
-
from ._scan_item import is_exclude_file
|
|
15
12
|
from ._scan_item import replace_word
|
|
16
13
|
from ._scan_item import is_notice_file
|
|
17
|
-
from ._scan_item import is_manifest_file
|
|
18
|
-
from ._scan_item import is_package_dir
|
|
19
14
|
from typing import Tuple
|
|
20
15
|
|
|
21
16
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
@@ -83,8 +78,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
83
78
|
msg = []
|
|
84
79
|
scancode_file_item = []
|
|
85
80
|
license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
|
|
86
|
-
prev_dir = ""
|
|
87
|
-
prev_dir_value = False
|
|
88
81
|
|
|
89
82
|
if scancode_file_list:
|
|
90
83
|
for file in scancode_file_list:
|
|
@@ -96,22 +89,11 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
96
89
|
is_binary = file.get("is_binary", False)
|
|
97
90
|
if "type" in file:
|
|
98
91
|
is_dir = file["type"] == "directory"
|
|
99
|
-
if is_dir:
|
|
100
|
-
prev_dir_value = is_exclude_dir(file_path)
|
|
101
|
-
prev_dir = file_path
|
|
102
|
-
|
|
103
92
|
if not is_binary and not is_dir:
|
|
104
93
|
licenses = file.get("licenses", [])
|
|
105
94
|
copyright_list = file.get("copyrights", [])
|
|
106
95
|
|
|
107
96
|
result_item = SourceItem(file_path)
|
|
108
|
-
is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
|
|
109
|
-
if is_pkg:
|
|
110
|
-
result_item.source_name_or_path = pkg_path
|
|
111
|
-
if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
|
|
112
|
-
result_item.exclude = True
|
|
113
|
-
scancode_file_item.append(result_item)
|
|
114
|
-
continue
|
|
115
97
|
|
|
116
98
|
if has_error and "scan_errors" in file:
|
|
117
99
|
error_msg = file.get("scan_errors", [])
|
|
@@ -197,35 +179,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
197
179
|
if len(license_detected) > 0:
|
|
198
180
|
result_item.licenses = license_detected
|
|
199
181
|
|
|
200
|
-
detected_without_pom = []
|
|
201
|
-
if is_manifest_file(file_path) and len(license_detected) > 0:
|
|
202
|
-
result_item.is_manifest_file = True
|
|
203
|
-
if file_path.endswith('.pom'):
|
|
204
|
-
try:
|
|
205
|
-
pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
|
|
206
|
-
normalize_pom_licenses = []
|
|
207
|
-
if pom_licenses:
|
|
208
|
-
pom_license_list = pom_licenses.split(', ')
|
|
209
|
-
for pom_license in pom_license_list:
|
|
210
|
-
if pom_license not in license_detected:
|
|
211
|
-
for lic_matched_key, lic_info in license_list.items():
|
|
212
|
-
if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
|
|
213
|
-
matched_txt = str(lic_info.matched_text).replace(',', '')
|
|
214
|
-
if pom_license in matched_txt:
|
|
215
|
-
normalize_pom_licenses.append(lic_info.license)
|
|
216
|
-
break
|
|
217
|
-
else:
|
|
218
|
-
normalize_pom_licenses.append(pom_license)
|
|
219
|
-
detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
|
|
220
|
-
if detected_without_pom:
|
|
221
|
-
result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
|
|
222
|
-
result_item.licenses = []
|
|
223
|
-
result_item.licenses = normalize_pom_licenses
|
|
224
|
-
if not normalize_pom_licenses:
|
|
225
|
-
result_item.exclude = True
|
|
226
|
-
except Exception as ex:
|
|
227
|
-
logger.info(f"Failed to extract license from POM {file_path}: {ex}")
|
|
228
|
-
|
|
229
182
|
# Remove copyright info for license text file of GPL family
|
|
230
183
|
if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
|
|
231
184
|
logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
|
|
@@ -233,13 +186,11 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
233
186
|
else:
|
|
234
187
|
result_item.copyright = copyright_value_list
|
|
235
188
|
|
|
236
|
-
if len(license_expression_list) > 0
|
|
189
|
+
if len(license_expression_list) > 0:
|
|
237
190
|
license_expression_list = list(
|
|
238
191
|
set(license_expression_list))
|
|
239
192
|
result_item.comment = ','.join(license_expression_list)
|
|
240
193
|
|
|
241
|
-
if is_exclude_file(file_path, prev_dir, prev_dir_value):
|
|
242
|
-
result_item.exclude = True
|
|
243
194
|
scancode_file_item.append(result_item)
|
|
244
195
|
except Exception as ex:
|
|
245
196
|
msg.append(f"Error Parsing item: {ex}")
|
|
@@ -271,17 +222,9 @@ def parsing_scancode_32_later(
|
|
|
271
222
|
is_binary = file.get("is_binary", False)
|
|
272
223
|
is_dir = file.get("type", "") == "directory"
|
|
273
224
|
if (not file_path) or is_binary or is_dir:
|
|
225
|
+
logger.info(f"Skipping {file_path} because it is binary or directory")
|
|
274
226
|
continue
|
|
275
|
-
|
|
276
227
|
result_item = SourceItem(file_path)
|
|
277
|
-
is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
|
|
278
|
-
if is_pkg:
|
|
279
|
-
result_item.source_name_or_path = pkg_path
|
|
280
|
-
if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
|
|
281
|
-
result_item.exclude = True
|
|
282
|
-
scancode_file_item.append(result_item)
|
|
283
|
-
continue
|
|
284
|
-
|
|
285
228
|
if has_error:
|
|
286
229
|
error_msg = file.get("scan_errors", [])
|
|
287
230
|
if error_msg:
|
|
@@ -334,43 +277,12 @@ def parsing_scancode_32_later(
|
|
|
334
277
|
license_list[lic_matched_key] = lic_info
|
|
335
278
|
license_detected.append(found_lic)
|
|
336
279
|
result_item.licenses = license_detected
|
|
337
|
-
|
|
338
|
-
result_item.exclude = is_exclude_file(file_path)
|
|
339
280
|
file_ext = os.path.splitext(file_path)[1].lower()
|
|
340
281
|
is_source_file = file_ext and file_ext in SOURCE_EXTENSIONS
|
|
341
282
|
result_item.is_license_text = is_notice_file(file_path) or (
|
|
342
283
|
file.get("percentage_of_license_text", 0) > 90 and not is_source_file
|
|
343
284
|
)
|
|
344
285
|
|
|
345
|
-
detected_without_pom = []
|
|
346
|
-
if is_manifest_file(file_path) and len(license_detected) > 0:
|
|
347
|
-
result_item.is_manifest_file = True
|
|
348
|
-
if file_path.endswith('.pom'):
|
|
349
|
-
try:
|
|
350
|
-
pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
|
|
351
|
-
normalize_pom_licenses = []
|
|
352
|
-
if pom_licenses:
|
|
353
|
-
pom_license_list = pom_licenses.split(', ')
|
|
354
|
-
for pom_license in pom_license_list:
|
|
355
|
-
if pom_license not in license_detected:
|
|
356
|
-
for lic_matched_key, lic_info in license_list.items():
|
|
357
|
-
if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
|
|
358
|
-
matched_txt = str(lic_info.matched_text).replace(',', '')
|
|
359
|
-
if pom_license in matched_txt:
|
|
360
|
-
normalize_pom_licenses.append(lic_info.license)
|
|
361
|
-
break
|
|
362
|
-
else:
|
|
363
|
-
normalize_pom_licenses.append(pom_license)
|
|
364
|
-
detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
|
|
365
|
-
if detected_without_pom:
|
|
366
|
-
result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
|
|
367
|
-
result_item.licenses = []
|
|
368
|
-
result_item.licenses = normalize_pom_licenses
|
|
369
|
-
if not normalize_pom_licenses:
|
|
370
|
-
result_item.exclude = True
|
|
371
|
-
except Exception as ex:
|
|
372
|
-
logger.info(f"Failed to extract license from POM {file_path}: {ex}")
|
|
373
|
-
|
|
374
286
|
# Remove copyright info for license text file of GPL family
|
|
375
287
|
if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
|
|
376
288
|
logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
|
|
@@ -378,7 +290,7 @@ def parsing_scancode_32_later(
|
|
|
378
290
|
else:
|
|
379
291
|
result_item.copyright = copyright_value_list
|
|
380
292
|
|
|
381
|
-
if len(license_detected) > 1
|
|
293
|
+
if len(license_detected) > 1:
|
|
382
294
|
license_expression_spdx = file.get("detected_license_expression_spdx", "")
|
|
383
295
|
license_expression = file.get("detected_license_expression", "")
|
|
384
296
|
if license_expression_spdx:
|
|
@@ -3,12 +3,9 @@
|
|
|
3
3
|
# Copyright (c) 2020 LG Electronics Inc.
|
|
4
4
|
# SPDX-License-Identifier: Apache-2.0
|
|
5
5
|
|
|
6
|
-
import os
|
|
7
6
|
import logging
|
|
8
7
|
import fosslight_util.constant as constant
|
|
9
8
|
from ._scan_item import SourceItem
|
|
10
|
-
from ._scan_item import is_exclude_file
|
|
11
|
-
from ._scan_item import is_package_dir
|
|
12
9
|
from ._scan_item import replace_word
|
|
13
10
|
from typing import Tuple
|
|
14
11
|
|
|
@@ -18,7 +15,7 @@ SCANOSS_INFO_HEADER = ['No', 'Source Path', 'Component Declared', 'SPDX Tag',
|
|
|
18
15
|
'Matched Rate (line number)', 'scanoss_fileURL']
|
|
19
16
|
|
|
20
17
|
|
|
21
|
-
def
|
|
18
|
+
def parsing_extra_info(scanned_result: dict) -> list:
|
|
22
19
|
scanoss_extra_info = []
|
|
23
20
|
for scan_item in scanned_result:
|
|
24
21
|
license_w_source = scan_item.scanoss_reference
|
|
@@ -37,22 +34,14 @@ def parsing_extraInfo(scanned_result: dict) -> list:
|
|
|
37
34
|
return scanoss_extra_info
|
|
38
35
|
|
|
39
36
|
|
|
40
|
-
def
|
|
37
|
+
def parsing_scan_result(scanoss_report: dict, excluded_files: set = None) -> Tuple[bool, list]:
|
|
41
38
|
scanoss_file_item = []
|
|
42
|
-
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
|
|
43
39
|
|
|
44
40
|
for file_path, findings in scanoss_report.items():
|
|
45
|
-
|
|
46
|
-
if
|
|
41
|
+
file_path_normalized = file_path.replace('\\', '/')
|
|
42
|
+
if file_path_normalized in excluded_files:
|
|
47
43
|
continue
|
|
48
44
|
result_item = SourceItem(file_path)
|
|
49
|
-
is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
|
|
50
|
-
if is_pkg:
|
|
51
|
-
result_item.source_name_or_path = pkg_path
|
|
52
|
-
if not any(x.source_name_or_path == result_item.source_name_or_path for x in scanoss_file_item):
|
|
53
|
-
result_item.exclude = True
|
|
54
|
-
scanoss_file_item.append(result_item)
|
|
55
|
-
continue
|
|
56
45
|
|
|
57
46
|
if 'id' in findings[0]:
|
|
58
47
|
if "none" == findings[0]['id']:
|
|
@@ -86,9 +75,6 @@ def parsing_scanResult(scanoss_report: dict, path_to_scan: str = "", path_to_exc
|
|
|
86
75
|
result_item.licenses = license_detected
|
|
87
76
|
result_item.scanoss_reference = license_w_source
|
|
88
77
|
|
|
89
|
-
if is_exclude_file(file_path):
|
|
90
|
-
result_item.exclude = True
|
|
91
|
-
|
|
92
78
|
if 'file_url' in findings[0]:
|
|
93
79
|
result_item.fileURL = findings[0]['file_url']
|
|
94
80
|
if 'matched' in findings[0]:
|
fosslight_source/_scan_item.py
CHANGED
|
@@ -18,15 +18,7 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
|
|
|
18
18
|
_notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
|
|
19
19
|
'[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
|
|
20
20
|
'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
|
|
21
|
-
_manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'
|
|
22
|
-
_exclude_filename = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
|
|
23
|
-
"configure", "configure.ac", "depcomp", "compile", "missing", "makefile"]
|
|
24
|
-
_exclude_extension = [".m4", ".in", ".po"]
|
|
25
|
-
_exclude_directory = ["test", "tests", "doc", "docs"]
|
|
26
|
-
_exclude_directory = [os.path.sep + dir_name +
|
|
27
|
-
os.path.sep for dir_name in _exclude_directory]
|
|
28
|
-
_exclude_directory.append("/.")
|
|
29
|
-
_package_directory = ["node_modules", "venv", "Pods", "Carthage"]
|
|
21
|
+
_manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'setup\.cfg$', r'.*\.podspec$', r'Cargo\.toml$']
|
|
30
22
|
MAX_LICENSE_LENGTH = 200
|
|
31
23
|
MAX_LICENSE_TOTAL_LENGTH = 600
|
|
32
24
|
SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
|
|
@@ -209,39 +201,6 @@ class SourceItem(FileItem):
|
|
|
209
201
|
return self.source_name_or_path == other.source_name_or_path
|
|
210
202
|
|
|
211
203
|
|
|
212
|
-
def is_exclude_dir(dir_path: str) -> bool:
|
|
213
|
-
if dir_path:
|
|
214
|
-
dir_path = dir_path.lower()
|
|
215
|
-
dir_path = dir_path if dir_path.endswith(
|
|
216
|
-
os.path.sep) else dir_path + os.path.sep
|
|
217
|
-
dir_path = dir_path if dir_path.startswith(
|
|
218
|
-
os.path.sep) else os.path.sep + dir_path
|
|
219
|
-
return any(dir_name in dir_path for dir_name in _exclude_directory)
|
|
220
|
-
return False
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
def is_exclude_file(file_path: str, prev_dir: str = None, prev_dir_exclude_value: bool = None) -> bool:
|
|
224
|
-
file_path = file_path.lower()
|
|
225
|
-
filename = os.path.basename(file_path)
|
|
226
|
-
if os.path.splitext(filename)[1] in _exclude_extension:
|
|
227
|
-
return True
|
|
228
|
-
if filename.startswith('.') or filename in _exclude_filename:
|
|
229
|
-
return True
|
|
230
|
-
|
|
231
|
-
dir_path = os.path.dirname(file_path)
|
|
232
|
-
if prev_dir is not None: # running ScanCode
|
|
233
|
-
if dir_path == prev_dir:
|
|
234
|
-
return prev_dir_exclude_value
|
|
235
|
-
else:
|
|
236
|
-
# There will be no execution of this else statement.
|
|
237
|
-
# Because scancode json output results are sorted by path,
|
|
238
|
-
# most of them will match the previous if statement.
|
|
239
|
-
return is_exclude_dir(dir_path)
|
|
240
|
-
else: # running SCANOSS
|
|
241
|
-
return is_exclude_dir(dir_path)
|
|
242
|
-
return False
|
|
243
|
-
|
|
244
|
-
|
|
245
204
|
def is_notice_file(file_path: str) -> bool:
|
|
246
205
|
pattern = r"({})(?<!w)".format("|".join(_notice_filename))
|
|
247
206
|
filename = os.path.basename(file_path)
|
|
@@ -252,42 +211,3 @@ def is_manifest_file(file_path: str) -> bool:
|
|
|
252
211
|
pattern = r"({})$".format("|".join(_manifest_filename))
|
|
253
212
|
filename = os.path.basename(file_path)
|
|
254
213
|
return bool(re.match(pattern, filename, re.IGNORECASE))
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
def is_package_dir(dir_path: str) -> bool:
|
|
258
|
-
# scancode and scanoss use '/' as path separator regardless of OS
|
|
259
|
-
dir_path = dir_path.replace('\\', '/')
|
|
260
|
-
path_parts = dir_path.split('/')
|
|
261
|
-
|
|
262
|
-
for pkg_dir in _package_directory:
|
|
263
|
-
if pkg_dir in path_parts:
|
|
264
|
-
pkg_index = path_parts.index(pkg_dir)
|
|
265
|
-
pkg_path = '/'.join(path_parts[:pkg_index + 1])
|
|
266
|
-
return True, pkg_path
|
|
267
|
-
return False, ""
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
def _has_parent_in_exclude_list(rel_path: str, path_to_exclude: list) -> bool:
|
|
271
|
-
path_parts = rel_path.replace('\\', '/').split('/')
|
|
272
|
-
for i in range(1, len(path_parts)):
|
|
273
|
-
parent_path = '/'.join(path_parts[:i])
|
|
274
|
-
if parent_path in path_to_exclude:
|
|
275
|
-
return True
|
|
276
|
-
return False
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
def get_excluded_paths(path_to_scan: str, custom_excluded_paths: list = []) -> list:
|
|
280
|
-
path_to_exclude = custom_excluded_paths.copy()
|
|
281
|
-
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
282
|
-
|
|
283
|
-
for root, dirs, files in os.walk(path_to_scan):
|
|
284
|
-
for dir_name in dirs:
|
|
285
|
-
dir_path = os.path.join(root, dir_name)
|
|
286
|
-
rel_path = os.path.relpath(dir_path, abs_path_to_scan)
|
|
287
|
-
if not _has_parent_in_exclude_list(rel_path, path_to_exclude):
|
|
288
|
-
if dir_name in _package_directory:
|
|
289
|
-
path_to_exclude.append(rel_path)
|
|
290
|
-
elif is_exclude_dir(rel_path):
|
|
291
|
-
path_to_exclude.append(rel_path)
|
|
292
|
-
|
|
293
|
-
return path_to_exclude
|
fosslight_source/cli.py
CHANGED
|
@@ -14,21 +14,23 @@ from datetime import datetime
|
|
|
14
14
|
import fosslight_util.constant as constant
|
|
15
15
|
from fosslight_util.set_log import init_log
|
|
16
16
|
from fosslight_util.timer_thread import TimerThread
|
|
17
|
-
from fosslight_util.exclude import excluding_files
|
|
18
17
|
from ._help import print_version, print_help_msg_source_scanner
|
|
19
18
|
from ._license_matched import get_license_list_to_print
|
|
20
19
|
from fosslight_util.output_format import check_output_formats_v2, write_output_file
|
|
21
20
|
from fosslight_util.correct import correct_with_yaml
|
|
22
21
|
from .run_scancode import run_scan
|
|
23
|
-
from .
|
|
22
|
+
from fosslight_util.exclude import get_excluded_paths
|
|
24
23
|
from .run_scanoss import run_scanoss_py
|
|
25
24
|
from .run_scanoss import get_scanoss_extra_info
|
|
26
25
|
import yaml
|
|
27
26
|
import argparse
|
|
28
27
|
from .run_spdx_extractor import get_spdx_downloads
|
|
28
|
+
from .run_manifest_extractor import get_manifest_licenses
|
|
29
29
|
from ._scan_item import SourceItem, KB_URL
|
|
30
30
|
from fosslight_util.oss_item import ScannerItem
|
|
31
31
|
from typing import Tuple
|
|
32
|
+
from ._scan_item import is_manifest_file
|
|
33
|
+
|
|
32
34
|
|
|
33
35
|
SRC_SHEET_NAME = 'SRC_FL_Source'
|
|
34
36
|
SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
@@ -38,6 +40,9 @@ MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
|
38
40
|
'OSS Version', 'License', 'Download Location',
|
|
39
41
|
'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
|
|
40
42
|
SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
|
|
43
|
+
EXCLUDE_FILENAME = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
|
|
44
|
+
"configure", "configure.ac", "depcomp", "compile", "missing", "Makefile"]
|
|
45
|
+
EXCLUDE_FILE_EXTENSION = [".m4", ".in", ".po"]
|
|
41
46
|
|
|
42
47
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
43
48
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
@@ -126,23 +131,6 @@ def main() -> None:
|
|
|
126
131
|
sys.exit(1)
|
|
127
132
|
|
|
128
133
|
|
|
129
|
-
def count_files(path_to_scan: str, path_to_exclude: list) -> Tuple[int, int]:
|
|
130
|
-
total_files = 0
|
|
131
|
-
excluded_files = 0
|
|
132
|
-
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
|
|
133
|
-
|
|
134
|
-
for root, _, files in os.walk(path_to_scan):
|
|
135
|
-
for file in files:
|
|
136
|
-
file_path = os.path.join(root, file)
|
|
137
|
-
abs_file_path = os.path.abspath(file_path)
|
|
138
|
-
if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
|
|
139
|
-
for exclude_path in abs_path_to_exclude):
|
|
140
|
-
excluded_files += 1
|
|
141
|
-
total_files += 1
|
|
142
|
-
|
|
143
|
-
return total_files, excluded_files
|
|
144
|
-
|
|
145
|
-
|
|
146
134
|
def create_report_file(
|
|
147
135
|
_start_time: str, merged_result: list,
|
|
148
136
|
license_list: list, scanoss_result: list,
|
|
@@ -150,7 +138,7 @@ def create_report_file(
|
|
|
150
138
|
output_path: str = "", output_files: list = [],
|
|
151
139
|
output_extensions: list = [], correct_mode: bool = True,
|
|
152
140
|
correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
|
|
153
|
-
formats: list = [],
|
|
141
|
+
formats: list = [], api_limit_exceed: bool = False, files_count: int = 0
|
|
154
142
|
) -> 'ScannerItem':
|
|
155
143
|
"""
|
|
156
144
|
Create report files for given scanned result.
|
|
@@ -209,7 +197,6 @@ def create_report_file(
|
|
|
209
197
|
|
|
210
198
|
scan_item = ScannerItem(PKG_NAME, _start_time)
|
|
211
199
|
scan_item.set_cover_pathinfo(path_to_scan, path_to_exclude)
|
|
212
|
-
files_count, _ = count_files(path_to_scan, path_to_exclude)
|
|
213
200
|
scan_item.set_cover_comment(f"Scanned files: {files_count}")
|
|
214
201
|
|
|
215
202
|
if api_limit_exceed:
|
|
@@ -223,12 +210,6 @@ def create_report_file(
|
|
|
223
210
|
|
|
224
211
|
if merged_result:
|
|
225
212
|
sheet_list = {}
|
|
226
|
-
# Remove results that are in excluding file list
|
|
227
|
-
for i in range(len(merged_result) - 1, -1, -1): # Iterate from last to first
|
|
228
|
-
item_path = merged_result[i].source_name_or_path # Assuming SourceItem has 'file_path' attribute
|
|
229
|
-
if item_path in excluded_file_list:
|
|
230
|
-
del merged_result[i] # Delete matching item
|
|
231
|
-
|
|
232
213
|
scan_item.append_file_items(merged_result, PKG_NAME)
|
|
233
214
|
|
|
234
215
|
if selected_scanner == 'scanoss':
|
|
@@ -287,7 +268,7 @@ def check_kb_server_reachable() -> bool:
|
|
|
287
268
|
|
|
288
269
|
def merge_results(
|
|
289
270
|
scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
|
|
290
|
-
path_to_scan: str = "", run_kb: bool = False
|
|
271
|
+
path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {}
|
|
291
272
|
) -> list:
|
|
292
273
|
|
|
293
274
|
"""
|
|
@@ -313,6 +294,19 @@ def merge_results(
|
|
|
313
294
|
new_result_item = SourceItem(file_name)
|
|
314
295
|
new_result_item.download_location = download_location
|
|
315
296
|
scancode_result.append(new_result_item)
|
|
297
|
+
if manifest_licenses:
|
|
298
|
+
for file_name, licenses in manifest_licenses.items():
|
|
299
|
+
if file_name in scancode_result:
|
|
300
|
+
merged_result_item = scancode_result[scancode_result.index(file_name)]
|
|
301
|
+
# overwrite existing detected licenses with manifest-provided licenses
|
|
302
|
+
merged_result_item.licenses = [] # clear existing licenses (setter clears when value falsy)
|
|
303
|
+
merged_result_item.licenses = licenses
|
|
304
|
+
merged_result_item.is_manifest_file = True
|
|
305
|
+
else:
|
|
306
|
+
new_result_item = SourceItem(file_name)
|
|
307
|
+
new_result_item.licenses = licenses
|
|
308
|
+
new_result_item.is_manifest_file = True
|
|
309
|
+
scancode_result.append(new_result_item)
|
|
316
310
|
if run_kb and not check_kb_server_reachable():
|
|
317
311
|
run_kb = False
|
|
318
312
|
if run_kb:
|
|
@@ -365,15 +359,17 @@ def run_scanners(
|
|
|
365
359
|
|
|
366
360
|
logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
|
|
367
361
|
True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
|
|
368
|
-
excluded_file_list = excluding_files(path_to_exclude, path_to_scan)
|
|
369
362
|
|
|
370
363
|
if '.xlsx' not in output_extensions and print_matched_text:
|
|
371
364
|
logger.warning("-m option is only available for excel.")
|
|
372
365
|
print_matched_text = False
|
|
373
366
|
|
|
374
367
|
if success:
|
|
375
|
-
|
|
368
|
+
path_to_exclude_with_filename = path_to_exclude + EXCLUDE_FILENAME
|
|
369
|
+
excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped = (
|
|
370
|
+
get_excluded_paths(path_to_scan, path_to_exclude_with_filename, EXCLUDE_FILE_EXTENSION))
|
|
376
371
|
logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
|
|
372
|
+
|
|
377
373
|
if not selected_scanner:
|
|
378
374
|
selected_scanner = 'all'
|
|
379
375
|
if selected_scanner in ['scancode', 'all', 'kb']:
|
|
@@ -381,18 +377,21 @@ def run_scanners(
|
|
|
381
377
|
write_json_file, num_cores, True,
|
|
382
378
|
print_matched_text, formats, called_by_cli,
|
|
383
379
|
time_out, correct_mode, correct_filepath,
|
|
384
|
-
excluded_path_with_default_exclusion
|
|
380
|
+
excluded_path_with_default_exclusion,
|
|
381
|
+
excluded_files)
|
|
382
|
+
excluded_files = set(excluded_files) if excluded_files else set()
|
|
385
383
|
if selected_scanner in ['scanoss', 'all']:
|
|
386
384
|
scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True, write_json_file,
|
|
387
|
-
num_cores, excluded_path_with_default_exclusion)
|
|
385
|
+
num_cores, excluded_path_with_default_exclusion, excluded_files)
|
|
388
386
|
if selected_scanner in SCANNER_TYPE:
|
|
389
387
|
run_kb = True if selected_scanner in ['kb', 'all'] else False
|
|
390
|
-
spdx_downloads =
|
|
391
|
-
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
|
|
388
|
+
spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
|
|
389
|
+
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
|
|
390
|
+
path_to_scan, run_kb, manifest_licenses)
|
|
392
391
|
scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
|
|
393
392
|
print_matched_text, output_path, output_files, output_extensions, correct_mode,
|
|
394
|
-
correct_filepath, path_to_scan,
|
|
395
|
-
api_limit_exceed)
|
|
393
|
+
correct_filepath, path_to_scan, excluded_path_without_dot, formats,
|
|
394
|
+
api_limit_exceed, cnt_file_except_skipped)
|
|
396
395
|
else:
|
|
397
396
|
print_help_msg_source_scanner()
|
|
398
397
|
result_log[RESULT_KEY] = "Unsupported scanner"
|
|
@@ -403,5 +402,38 @@ def run_scanners(
|
|
|
403
402
|
return success, result_log.get(RESULT_KEY, ""), scan_item, license_list, scanoss_result
|
|
404
403
|
|
|
405
404
|
|
|
405
|
+
def metadata_collector(path_to_scan: str, excluded_files: set) -> dict:
|
|
406
|
+
"""
|
|
407
|
+
Collect metadata for merging.
|
|
408
|
+
|
|
409
|
+
- Traverse files with exclusions applied
|
|
410
|
+
- spdx_downloads: {rel_path: [download_urls]}
|
|
411
|
+
- manifest_licenses: {rel_path: [license_names]}
|
|
412
|
+
|
|
413
|
+
:return: (spdx_downloads, manifest_licenses)
|
|
414
|
+
"""
|
|
415
|
+
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
416
|
+
spdx_downloads = {}
|
|
417
|
+
manifest_licenses = {}
|
|
418
|
+
|
|
419
|
+
for root, dirs, files in os.walk(path_to_scan):
|
|
420
|
+
for file in files:
|
|
421
|
+
file_path = os.path.join(root, file)
|
|
422
|
+
rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
|
|
423
|
+
if rel_path_file in excluded_files:
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
downloads = get_spdx_downloads(file_path)
|
|
427
|
+
if downloads:
|
|
428
|
+
spdx_downloads[rel_path_file] = downloads
|
|
429
|
+
|
|
430
|
+
if is_manifest_file(file_path):
|
|
431
|
+
licenses = get_manifest_licenses(file_path)
|
|
432
|
+
if licenses:
|
|
433
|
+
manifest_licenses[rel_path_file] = licenses
|
|
434
|
+
|
|
435
|
+
return spdx_downloads, manifest_licenses
|
|
436
|
+
|
|
437
|
+
|
|
406
438
|
if __name__ == '__main__':
|
|
407
439
|
main()
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright (c) 2025 LG Electronics Inc.
|
|
4
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
import logging
|
|
9
|
+
from fosslight_util.get_pom_license import get_license_from_pom
|
|
10
|
+
import fosslight_util.constant as constant
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _split_spdx_expression(value: str) -> list[str]:
|
|
16
|
+
parts = re.split(r'\s+(?:OR|AND)\s+|[|]{2}|&&', value, flags=re.IGNORECASE)
|
|
17
|
+
tokens: list[str] = []
|
|
18
|
+
for part in parts:
|
|
19
|
+
token = part.strip().strip('()')
|
|
20
|
+
if token:
|
|
21
|
+
tokens.append(token)
|
|
22
|
+
unique: list[str] = []
|
|
23
|
+
for t in tokens:
|
|
24
|
+
if t not in unique:
|
|
25
|
+
unique.append(t)
|
|
26
|
+
return unique
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_licenses_from_package_json(file_path: str) -> list[str]:
|
|
30
|
+
try:
|
|
31
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
32
|
+
data = json.load(f)
|
|
33
|
+
except Exception as ex:
|
|
34
|
+
logger.info(f"Failed to read package.json {file_path}: {ex}")
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
if not isinstance(data, dict):
|
|
38
|
+
return []
|
|
39
|
+
|
|
40
|
+
licenses: list[str] = []
|
|
41
|
+
license_field = data.get('license')
|
|
42
|
+
|
|
43
|
+
if isinstance(license_field, str):
|
|
44
|
+
value = license_field.strip()
|
|
45
|
+
if value.upper() == 'UNLICENSED':
|
|
46
|
+
return []
|
|
47
|
+
if value.upper().startswith('SEE LICENSE IN'):
|
|
48
|
+
return []
|
|
49
|
+
licenses.extend(_split_spdx_expression(value))
|
|
50
|
+
elif isinstance(license_field, dict):
|
|
51
|
+
type_val = license_field.get('type')
|
|
52
|
+
if isinstance(type_val, str):
|
|
53
|
+
type_val = type_val.strip()
|
|
54
|
+
if type_val and type_val.upper() != 'UNLICENSED':
|
|
55
|
+
licenses.append(type_val)
|
|
56
|
+
|
|
57
|
+
if not licenses:
|
|
58
|
+
legacy = data.get('licenses')
|
|
59
|
+
if isinstance(legacy, list):
|
|
60
|
+
for item in legacy:
|
|
61
|
+
if isinstance(item, str):
|
|
62
|
+
token = item.strip()
|
|
63
|
+
if token and token.upper() != 'UNLICENSED':
|
|
64
|
+
licenses.append(token)
|
|
65
|
+
elif isinstance(item, dict):
|
|
66
|
+
t = item.get('type')
|
|
67
|
+
if isinstance(t, str):
|
|
68
|
+
t = t.strip()
|
|
69
|
+
if t and t.upper() != 'UNLICENSED':
|
|
70
|
+
licenses.append(t)
|
|
71
|
+
|
|
72
|
+
unique: list[str] = []
|
|
73
|
+
for lic in licenses:
|
|
74
|
+
if lic not in unique:
|
|
75
|
+
unique.append(lic)
|
|
76
|
+
return unique
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_licenses_from_setup_cfg(file_path: str) -> list[str]:
|
|
80
|
+
try:
|
|
81
|
+
import configparser
|
|
82
|
+
parser = configparser.ConfigParser()
|
|
83
|
+
parser.read(file_path, encoding='utf-8')
|
|
84
|
+
if parser.has_section('metadata'):
|
|
85
|
+
license_value = parser.get('metadata', 'license', fallback='').strip()
|
|
86
|
+
if license_value:
|
|
87
|
+
return _split_spdx_expression(license_value)
|
|
88
|
+
except Exception as ex:
|
|
89
|
+
logger.info(f"Failed to parse setup.cfg with configparser for {file_path}: {ex}")
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
93
|
+
content = f.read()
|
|
94
|
+
meta_match = re.search(r'^\s*\[metadata\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
|
|
95
|
+
if not meta_match:
|
|
96
|
+
return []
|
|
97
|
+
block = meta_match.group(1)
|
|
98
|
+
m = re.search(r'^\s*license\s*=\s*(.+)$', block, flags=re.MULTILINE)
|
|
99
|
+
if not m:
|
|
100
|
+
return []
|
|
101
|
+
val = m.group(1).strip()
|
|
102
|
+
if (len(val) >= 2) and ((val[0] == val[-1]) and val[0] in ('"', "'")):
|
|
103
|
+
val = val[1:-1].strip()
|
|
104
|
+
if not val:
|
|
105
|
+
return []
|
|
106
|
+
return _split_spdx_expression(val)
|
|
107
|
+
except Exception as ex:
|
|
108
|
+
logger.info(f"Failed to parse setup.cfg {file_path} via regex fallback: {ex}")
|
|
109
|
+
return []
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def get_licenses_from_setup_py(file_path: str) -> list[str]:
|
|
113
|
+
try:
|
|
114
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
115
|
+
content = f.read()
|
|
116
|
+
except Exception as ex:
|
|
117
|
+
logger.info(f"Failed to read setup.py {file_path}: {ex}")
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
match = re.search(r'license\s*=\s*([\'"]{1,3})(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
|
|
121
|
+
if not match:
|
|
122
|
+
return []
|
|
123
|
+
value = match.group(2).strip()
|
|
124
|
+
if not value:
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
return _split_spdx_expression(value)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_licenses_from_podspec(file_path: str) -> list[str]:
|
|
131
|
+
try:
|
|
132
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
133
|
+
content = f.read()
|
|
134
|
+
except Exception as ex:
|
|
135
|
+
logger.info(f"Failed to read podspec {file_path}: {ex}")
|
|
136
|
+
return []
|
|
137
|
+
|
|
138
|
+
m = re.search(r'\blicense\s*=\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE)
|
|
139
|
+
if m:
|
|
140
|
+
value = m.group(2).strip()
|
|
141
|
+
if value:
|
|
142
|
+
return _split_spdx_expression(value)
|
|
143
|
+
|
|
144
|
+
m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
|
|
145
|
+
if m:
|
|
146
|
+
value = m.group(2).strip()
|
|
147
|
+
if value:
|
|
148
|
+
return _split_spdx_expression(value)
|
|
149
|
+
|
|
150
|
+
m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*:(\w+)', content, flags=re.IGNORECASE | re.DOTALL)
|
|
151
|
+
if m:
|
|
152
|
+
value = m.group(1).strip()
|
|
153
|
+
if value:
|
|
154
|
+
return _split_spdx_expression(value)
|
|
155
|
+
|
|
156
|
+
m = re.search(r'\blicense\s*=\s*:(\w+)', content, flags=re.DOTALL | re.IGNORECASE)
|
|
157
|
+
if m:
|
|
158
|
+
value = m.group(1).strip()
|
|
159
|
+
if value:
|
|
160
|
+
return _split_spdx_expression(value)
|
|
161
|
+
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
|
|
166
|
+
try:
|
|
167
|
+
data = None
|
|
168
|
+
try:
|
|
169
|
+
import tomllib as toml_loader # Python 3.11+
|
|
170
|
+
with open(file_path, 'rb') as f:
|
|
171
|
+
data = toml_loader.load(f)
|
|
172
|
+
except Exception:
|
|
173
|
+
try:
|
|
174
|
+
import tomli as toml_loader # Backport
|
|
175
|
+
with open(file_path, 'rb') as f:
|
|
176
|
+
data = toml_loader.load(f)
|
|
177
|
+
except Exception:
|
|
178
|
+
data = None
|
|
179
|
+
|
|
180
|
+
if isinstance(data, dict):
|
|
181
|
+
package_tbl = data.get('package') or {}
|
|
182
|
+
license_value = package_tbl.get('license')
|
|
183
|
+
if isinstance(license_value, str) and license_value.strip():
|
|
184
|
+
return _split_spdx_expression(license_value.strip())
|
|
185
|
+
if package_tbl.get('license-file'):
|
|
186
|
+
return []
|
|
187
|
+
except Exception as ex:
|
|
188
|
+
logger.info(f"Failed to parse Cargo.toml via toml parser for {file_path}: {ex}")
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
192
|
+
content = f.read()
|
|
193
|
+
pkg_match = re.search(r'^\s*\[package\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
|
|
194
|
+
if not pkg_match:
|
|
195
|
+
return []
|
|
196
|
+
block = pkg_match.group(1)
|
|
197
|
+
m = re.search(r'^\s*license\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)', block, flags=re.MULTILINE | re.DOTALL)
|
|
198
|
+
if m:
|
|
199
|
+
val = m.group('val').strip()
|
|
200
|
+
if val:
|
|
201
|
+
return _split_spdx_expression(val)
|
|
202
|
+
m2 = re.search(r'^\s*license-file\s*=\s*(?:"""|\'\'\'|"|\')(.*?)(?:"""|\'\'\'|"|\')', block,
|
|
203
|
+
flags=re.MULTILINE | re.DOTALL)
|
|
204
|
+
if m2:
|
|
205
|
+
return []
|
|
206
|
+
except Exception as ex:
|
|
207
|
+
logger.info(f"Failed to parse Cargo.toml {file_path}: {ex}")
|
|
208
|
+
return []
|
|
209
|
+
return []
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def get_manifest_licenses(file_path: str) -> list[str]:
|
|
213
|
+
if file_path.endswith('.pom'):
|
|
214
|
+
try:
|
|
215
|
+
pom_licenses = get_license_from_pom(group_id='', artifact_id='', version='', pom_path=file_path, check_parent=True)
|
|
216
|
+
if not pom_licenses:
|
|
217
|
+
return []
|
|
218
|
+
return [x.strip() for x in pom_licenses.split(', ') if x.strip()]
|
|
219
|
+
except Exception as ex:
|
|
220
|
+
logger.info(f"Failed to extract license from POM {file_path}: {ex}")
|
|
221
|
+
return []
|
|
222
|
+
elif os.path.basename(file_path).lower() == 'package.json':
|
|
223
|
+
try:
|
|
224
|
+
return get_licenses_from_package_json(file_path)
|
|
225
|
+
except Exception as ex:
|
|
226
|
+
logger.info(f"Failed to extract license from package.json {file_path}: {ex}")
|
|
227
|
+
return []
|
|
228
|
+
elif os.path.basename(file_path).lower() == 'setup.cfg':
|
|
229
|
+
try:
|
|
230
|
+
return get_licenses_from_setup_cfg(file_path)
|
|
231
|
+
except Exception as ex:
|
|
232
|
+
logger.info(f"Failed to extract license from setup.cfg {file_path}: {ex}")
|
|
233
|
+
return []
|
|
234
|
+
elif os.path.basename(file_path).lower() == 'setup.py':
|
|
235
|
+
try:
|
|
236
|
+
return get_licenses_from_setup_py(file_path)
|
|
237
|
+
except Exception as ex:
|
|
238
|
+
logger.info(f"Failed to extract license from setup.py {file_path}: {ex}")
|
|
239
|
+
return []
|
|
240
|
+
elif os.path.basename(file_path).lower().endswith('.podspec'):
|
|
241
|
+
try:
|
|
242
|
+
return get_licenses_from_podspec(file_path)
|
|
243
|
+
except Exception as ex:
|
|
244
|
+
logger.info(f"Failed to extract license from podspec {file_path}: {ex}")
|
|
245
|
+
return []
|
|
246
|
+
elif os.path.basename(file_path).lower() == 'cargo.toml':
|
|
247
|
+
try:
|
|
248
|
+
return get_licenses_from_cargo_toml(file_path)
|
|
249
|
+
except Exception as ex:
|
|
250
|
+
logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
|
|
251
|
+
return []
|
fosslight_source/run_scancode.py
CHANGED
|
@@ -29,7 +29,8 @@ def run_scan(
|
|
|
29
29
|
return_results: bool = False, need_license: bool = False,
|
|
30
30
|
formats: list = [], called_by_cli: bool = False,
|
|
31
31
|
time_out: int = 120, correct_mode: bool = True,
|
|
32
|
-
correct_filepath: str = "", path_to_exclude: list = []
|
|
32
|
+
correct_filepath: str = "", path_to_exclude: list = [],
|
|
33
|
+
excluded_files: list = []
|
|
33
34
|
) -> Tuple[bool, str, list, list]:
|
|
34
35
|
if not called_by_cli:
|
|
35
36
|
global logger
|
|
@@ -90,39 +91,40 @@ def run_scan(
|
|
|
90
91
|
exclude_path_normalized = os.path.normpath(exclude_path).replace("\\", "/")
|
|
91
92
|
|
|
92
93
|
if exclude_path_normalized.endswith("/**"):
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
is_dir = os.path.isdir(full_exclude_path)
|
|
102
|
-
is_file = os.path.isfile(full_exclude_path)
|
|
103
|
-
if is_dir:
|
|
104
|
-
dir_name = os.path.basename(exclude_path_normalized.rstrip("/"))
|
|
105
|
-
base_path = exclude_path_normalized.rstrip("/")
|
|
106
|
-
|
|
107
|
-
if dir_name:
|
|
108
|
-
total_files_to_excluded.append(dir_name)
|
|
109
|
-
max_depth = 0
|
|
110
|
-
for root, dirs, files in os.walk(full_exclude_path):
|
|
111
|
-
depth = root[len(full_exclude_path):].count(os.sep)
|
|
112
|
-
max_depth = max(max_depth, depth)
|
|
113
|
-
for depth in range(1, max_depth + 2):
|
|
114
|
-
pattern = base_path + "/*" * depth
|
|
115
|
-
total_files_to_excluded.append(pattern)
|
|
94
|
+
base_dir = exclude_path_normalized[:-3].rstrip("/")
|
|
95
|
+
if base_dir:
|
|
96
|
+
full_exclude_path = os.path.join(abs_path_to_scan, base_dir)
|
|
97
|
+
if os.path.isdir(full_exclude_path):
|
|
98
|
+
total_files_to_excluded.append(base_dir)
|
|
99
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
100
|
+
else:
|
|
101
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
116
102
|
else:
|
|
117
103
|
total_files_to_excluded.append(exclude_path_normalized)
|
|
118
|
-
elif is_file:
|
|
119
|
-
total_files_to_excluded.append(exclude_path_normalized)
|
|
120
104
|
else:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
105
|
+
has_glob_chars = any(char in exclude_path_normalized for char in ['*', '?', '['])
|
|
106
|
+
if not has_glob_chars:
|
|
107
|
+
full_exclude_path = os.path.join(abs_path_to_scan, exclude_path_normalized)
|
|
108
|
+
is_dir = os.path.isdir(full_exclude_path)
|
|
109
|
+
is_file = os.path.isfile(full_exclude_path)
|
|
110
|
+
else:
|
|
111
|
+
is_dir = False
|
|
112
|
+
is_file = False
|
|
113
|
+
|
|
114
|
+
if is_dir:
|
|
115
|
+
base_path = exclude_path_normalized.rstrip("/")
|
|
116
|
+
if base_path:
|
|
117
|
+
total_files_to_excluded.append(base_path)
|
|
118
|
+
total_files_to_excluded.append(f"{base_path}/**")
|
|
119
|
+
else:
|
|
120
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
121
|
+
elif is_file:
|
|
122
|
+
total_files_to_excluded.append(f"**/{exclude_path_normalized}")
|
|
123
|
+
else:
|
|
124
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
125
|
+
|
|
126
|
+
if excluded_files:
|
|
127
|
+
total_files_to_excluded.extend(f"**/{file_path}" for file_path in excluded_files)
|
|
126
128
|
|
|
127
129
|
total_files_to_excluded = sorted(list(set(total_files_to_excluded)))
|
|
128
130
|
ignore_tuple = tuple(total_files_to_excluded)
|
fosslight_source/run_scanoss.py
CHANGED
|
@@ -12,8 +12,8 @@ from datetime import datetime
|
|
|
12
12
|
import fosslight_util.constant as constant
|
|
13
13
|
from fosslight_util.set_log import init_log
|
|
14
14
|
from fosslight_util.output_format import check_output_formats_v2 # , write_output_file
|
|
15
|
-
from ._parsing_scanoss_file import
|
|
16
|
-
from ._parsing_scanoss_file import
|
|
15
|
+
from ._parsing_scanoss_file import parsing_scan_result # scanoss
|
|
16
|
+
from ._parsing_scanoss_file import parsing_extra_info # scanoss
|
|
17
17
|
import shutil
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
from scanoss.scanner import Scanner, ScanType
|
|
@@ -28,11 +28,12 @@ SCANOSS_OUTPUT_FILE = "scanoss_raw_result.json"
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def get_scanoss_extra_info(scanned_result: dict) -> list:
|
|
31
|
-
return
|
|
31
|
+
return parsing_extra_info(scanned_result)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
|
|
35
|
-
|
|
34
|
+
def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
|
|
35
|
+
called_by_cli: bool = False, write_json_file: bool = False, num_threads: int = -1,
|
|
36
|
+
path_to_exclude: list = [], excluded_files: set = None) -> list:
|
|
36
37
|
"""
|
|
37
38
|
Run scanoss.py for the given path.
|
|
38
39
|
|
|
@@ -72,7 +73,7 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
|
|
|
72
73
|
try:
|
|
73
74
|
scanner = Scanner(
|
|
74
75
|
ignore_cert_errors=True,
|
|
75
|
-
skip_folders=path_to_exclude,
|
|
76
|
+
skip_folders=list(path_to_exclude) if path_to_exclude else [],
|
|
76
77
|
scan_output=output_json_file,
|
|
77
78
|
scan_options=ScanType.SCAN_SNIPPETS.value,
|
|
78
79
|
nb_threads=num_threads if num_threads > 0 else 10
|
|
@@ -86,30 +87,16 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
|
|
|
86
87
|
logger.debug(f"{captured_output}")
|
|
87
88
|
|
|
88
89
|
if os.path.isfile(output_json_file):
|
|
89
|
-
total_files_to_excluded = []
|
|
90
|
-
if path_to_exclude:
|
|
91
|
-
for path in path_to_exclude:
|
|
92
|
-
path = os.path.join(path_to_scan, os.path.relpath(path, os.path.abspath(path_to_scan))) \
|
|
93
|
-
if not os.path.isabs(path_to_scan) and os.path.isabs(path) else os.path.join(path_to_scan, path)
|
|
94
|
-
if os.path.isdir(path):
|
|
95
|
-
for root, _, files in os.walk(path):
|
|
96
|
-
root = root[len(path_to_scan) + 1:]
|
|
97
|
-
total_files_to_excluded.extend([os.path.normpath(os.path.join(root, file)).replace('\\', '/')
|
|
98
|
-
for file in files])
|
|
99
|
-
elif os.path.isfile(path):
|
|
100
|
-
path = path[len(path_to_scan) + 1:]
|
|
101
|
-
total_files_to_excluded.append(os.path.normpath(path).replace('\\', '/'))
|
|
102
|
-
|
|
103
90
|
with open(output_json_file, "r") as st_json:
|
|
104
91
|
st_python = json.load(st_json)
|
|
105
|
-
for key_to_exclude in
|
|
92
|
+
for key_to_exclude in excluded_files:
|
|
106
93
|
if key_to_exclude in st_python:
|
|
107
94
|
del st_python[key_to_exclude]
|
|
108
95
|
with open(output_json_file, 'w') as st_json:
|
|
109
96
|
json.dump(st_python, st_json, indent=4)
|
|
110
97
|
with open(output_json_file, "r") as st_json:
|
|
111
98
|
st_python = json.load(st_json)
|
|
112
|
-
scanoss_file_list =
|
|
99
|
+
scanoss_file_list = parsing_scan_result(st_python, excluded_files)
|
|
113
100
|
|
|
114
101
|
except Exception as error:
|
|
115
102
|
logger.debug(f"SCANOSS Parsing {path_to_scan}: {error}")
|
|
@@ -12,39 +12,15 @@ import mmap
|
|
|
12
12
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
|
|
18
|
-
for root, dirs, files in os.walk(path_to_scan):
|
|
19
|
-
for file in files:
|
|
20
|
-
file_path = os.path.join(root, file)
|
|
21
|
-
abs_file_path = os.path.abspath(file_path)
|
|
22
|
-
if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
|
|
23
|
-
for exclude_path in abs_path_to_exclude):
|
|
24
|
-
continue
|
|
25
|
-
file_list.append(file_path)
|
|
26
|
-
return file_list
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def get_spdx_downloads(path_to_scan: str, path_to_exclude: list = []) -> dict:
|
|
30
|
-
download_dict = {}
|
|
15
|
+
def get_spdx_downloads(file_path: str) -> list[str]:
|
|
16
|
+
results = []
|
|
31
17
|
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
42
|
-
for word in find_word.findall(mmap_obj):
|
|
43
|
-
if rel_path_file in download_dict:
|
|
44
|
-
download_dict[rel_path_file].append(word.decode('utf-8'))
|
|
45
|
-
else:
|
|
46
|
-
download_dict[rel_path_file] = [word.decode('utf-8')]
|
|
47
|
-
except Exception as ex:
|
|
48
|
-
msg = str(ex)
|
|
49
|
-
logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {msg}")
|
|
50
|
-
return download_dict
|
|
18
|
+
try:
|
|
19
|
+
if os.path.getsize(file_path) > 0:
|
|
20
|
+
with open(file_path, "r") as f:
|
|
21
|
+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
22
|
+
for word in find_word.findall(mmap_obj):
|
|
23
|
+
results.append(word.decode('utf-8'))
|
|
24
|
+
except Exception as ex:
|
|
25
|
+
logger.warning(f"Failed to extract SPDX download location. {file_path}, {ex}")
|
|
26
|
+
return results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fosslight_source
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: FOSSLight Source Scanner
|
|
5
5
|
Home-page: https://github.com/fosslight/fosslight_source_scanner
|
|
6
6
|
Download-URL: https://github.com/fosslight/fosslight_source_scanner
|
|
@@ -17,7 +17,7 @@ License-File: LICENSE
|
|
|
17
17
|
Requires-Dist: pyparsing
|
|
18
18
|
Requires-Dist: scanoss>=1.18.0
|
|
19
19
|
Requires-Dist: XlsxWriter
|
|
20
|
-
Requires-Dist: fosslight_util>=2.1.
|
|
20
|
+
Requires-Dist: fosslight_util>=2.1.34
|
|
21
21
|
Requires-Dist: PyYAML
|
|
22
22
|
Requires-Dist: wheel>=0.38.1
|
|
23
23
|
Requires-Dist: intbitset
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
|
|
3
|
+
fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
|
|
4
|
+
fosslight_source/_parsing_scancode_file_item.py,sha256=9TjCwTTPaytqTlZzCKzaX-n20xJLy346xwp0Ee-rWWA,14951
|
|
5
|
+
fosslight_source/_parsing_scanoss_file.py,sha256=L3iHqmQF2jeSpHYuYSre44doXKy-BoX0u1Lm2IfJSU8,3866
|
|
6
|
+
fosslight_source/_scan_item.py,sha256=rWoC-jMc6Hf_dpiwVdQjATNhkzRgLVn966q3UA1TAxc,9412
|
|
7
|
+
fosslight_source/cli.py,sha256=Ohsl9h4-9zS72pPQCf22Ij7LrUinhpioC_2DNro3bXQ,19385
|
|
8
|
+
fosslight_source/run_manifest_extractor.py,sha256=oaCdfXDp0bRqeEq0U3Yaf6fiZ-xjOOGsZBVbAeMN2zI,9105
|
|
9
|
+
fosslight_source/run_scancode.py,sha256=TFyNLV6P9rSBo9royDoG6az4l7Tkpl8Gr66IFK1DBU8,9021
|
|
10
|
+
fosslight_source/run_scanoss.py,sha256=_gdA4kOByI4saT4bDvMwIabpxtpH4f_yruHdBtb_g-o,4852
|
|
11
|
+
fosslight_source/run_spdx_extractor.py,sha256=wIdjDIpzglc2tfrh_YodkAQ0QqfgfmJBSuE2bf4w4Tg,862
|
|
12
|
+
fosslight_source-2.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
13
|
+
fosslight_source-2.2.2.dist-info/METADATA,sha256=Jw2ERDhSs7sfNrQuV3bLT5vnC4fMDYGV-EFmTak_yfc,3557
|
|
14
|
+
fosslight_source-2.2.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
15
|
+
fosslight_source-2.2.2.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
|
|
16
|
+
fosslight_source-2.2.2.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
|
|
17
|
+
fosslight_source-2.2.2.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
|
|
3
|
-
fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
|
|
4
|
-
fosslight_source/_parsing_scancode_file_item.py,sha256=JjFm1rYzFnV7VOO0lyY32qfID9UlOwNCUfhxABgG6Ng,20692
|
|
5
|
-
fosslight_source/_parsing_scanoss_file.py,sha256=0f5JzjnFU-kcPZRX7OKnextyvANjKwwNZeyCJVC7eME,4624
|
|
6
|
-
fosslight_source/_scan_item.py,sha256=mPNdVdVagiPI4YlL0Nu656nU5yvWTdZTKR2SxXA8l1g,12612
|
|
7
|
-
fosslight_source/cli.py,sha256=x3z8NuiU7hAX8rWHqeEyAtMn5Rtw6dGk5UjvGGfIjl8,17902
|
|
8
|
-
fosslight_source/run_scancode.py,sha256=BcMzUEoHX4ukvj9Z2mZX-6HaY2yKk_AvQHeDzS6REPg,8934
|
|
9
|
-
fosslight_source/run_scanoss.py,sha256=8wu3sa-YBqjfb5x2dbDJuAdw3rrExueOW23WdzqDCaU,5721
|
|
10
|
-
fosslight_source/run_spdx_extractor.py,sha256=Hr9sTv06cJaVITy8amwexIW2FV8_rUcFw6hKmR9ZYws,1990
|
|
11
|
-
fosslight_source-2.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
12
|
-
fosslight_source-2.2.0.dist-info/METADATA,sha256=1vWU_HiBNhGVBpjym9Mz8jKf8opyCXZRPAHg1qzUy3E,3557
|
|
13
|
-
fosslight_source-2.2.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
14
|
-
fosslight_source-2.2.0.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
|
|
15
|
-
fosslight_source-2.2.0.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
|
|
16
|
-
fosslight_source-2.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|