fosslight-source 2.2.10__tar.gz → 2.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fosslight_source-2.2.10/src/fosslight_source.egg-info → fosslight_source-2.2.11}/PKG-INFO +1 -1
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/setup.py +1 -1
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_scan_item.py +5 -1
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/cli.py +100 -40
- {fosslight_source-2.2.10 → fosslight_source-2.2.11/src/fosslight_source.egg-info}/PKG-INFO +1 -1
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/LICENSE +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/MANIFEST.in +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/README.md +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/requirements.txt +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/setup.cfg +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/__init__.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_help.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_license_matched.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_parsing_scancode_file_item.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_manifest_extractor.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_scancode.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_scanoss.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_spdx_extractor.py +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/SOURCES.txt +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/entry_points.txt +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/requires.txt +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/top_level.txt +0 -0
- {fosslight_source-2.2.10 → fosslight_source-2.2.11}/tests/test_tox.py +0 -0
|
@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
|
|
|
14
14
|
if __name__ == "__main__":
|
|
15
15
|
setup(
|
|
16
16
|
name='fosslight_source',
|
|
17
|
-
version='2.2.
|
|
17
|
+
version='2.2.11',
|
|
18
18
|
package_dir={"": "src"},
|
|
19
19
|
packages=find_packages(where='src'),
|
|
20
20
|
description='FOSSLight Source Scanner',
|
|
@@ -44,6 +44,8 @@ class SourceItem(FileItem):
|
|
|
44
44
|
self.oss_version = ""
|
|
45
45
|
|
|
46
46
|
self.checksum = get_checksum_sha1(value)
|
|
47
|
+
self.kb_origin_url = "" # URL from OSS KB (_get_origin_url_from_md5_hash)
|
|
48
|
+
self.kb_evidence = "" # Evidence from KB API (exact_match or code snippet)
|
|
47
49
|
|
|
48
50
|
def __del__(self) -> None:
|
|
49
51
|
pass
|
|
@@ -104,6 +106,7 @@ class SourceItem(FileItem):
|
|
|
104
106
|
return md5_hex, wfp
|
|
105
107
|
|
|
106
108
|
def _get_origin_url_from_md5_hash(self, md5_hash: str, wfp: str = "") -> str:
|
|
109
|
+
"""Return origin_url from KB API."""
|
|
107
110
|
try:
|
|
108
111
|
payload = {"file_hash": md5_hash}
|
|
109
112
|
if wfp and wfp.strip():
|
|
@@ -115,7 +118,6 @@ class SourceItem(FileItem):
|
|
|
115
118
|
with urllib.request.urlopen(request, timeout=10) as response:
|
|
116
119
|
data = json.loads(response.read().decode())
|
|
117
120
|
if isinstance(data, dict):
|
|
118
|
-
# Only extract output if return_code is 0 (success)
|
|
119
121
|
return_code = data.get('return_code', -1)
|
|
120
122
|
if return_code == 0:
|
|
121
123
|
output = data.get('output', '')
|
|
@@ -183,6 +185,8 @@ class SourceItem(FileItem):
|
|
|
183
185
|
if md5_hash:
|
|
184
186
|
origin_url = self._get_origin_url_from_md5_hash(md5_hash, wfp)
|
|
185
187
|
if origin_url:
|
|
188
|
+
self.kb_origin_url = origin_url
|
|
189
|
+
self.kb_evidence = "exact_match"
|
|
186
190
|
extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
|
|
187
191
|
if extracted_name:
|
|
188
192
|
self.oss_name = extracted_name
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
import sys
|
|
7
7
|
import os
|
|
8
8
|
import platform
|
|
9
|
+
import time
|
|
9
10
|
import warnings
|
|
10
11
|
import logging
|
|
11
12
|
import urllib.request
|
|
@@ -40,7 +41,9 @@ SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
|
40
41
|
MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
41
42
|
'OSS Version', 'License', 'Download Location',
|
|
42
43
|
'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
|
|
43
|
-
|
|
44
|
+
KB_REFERENCE_HEADER = ['ID', 'Source Path', 'KB Origin URL', 'Evidence']
|
|
45
|
+
ALL_MODE = 'all'
|
|
46
|
+
SCANNER_TYPE = ['kb', 'scancode', 'scanoss', ALL_MODE]
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
@@ -72,7 +75,7 @@ def main() -> None:
|
|
|
72
75
|
parser.add_argument('-o', '--output', nargs=1, type=str, required=False, default="")
|
|
73
76
|
parser.add_argument('-m', '--matched', action='store_true', required=False)
|
|
74
77
|
parser.add_argument('-f', '--formats', nargs='*', type=str, required=False)
|
|
75
|
-
parser.add_argument('-s', '--scanner', nargs=1, type=str, required=False, default=
|
|
78
|
+
parser.add_argument('-s', '--scanner', nargs=1, type=str, required=False, default=ALL_MODE)
|
|
76
79
|
parser.add_argument('-t', '--timeout', type=int, required=False, default=120)
|
|
77
80
|
parser.add_argument('-c', '--cores', type=int, required=False, default=-1)
|
|
78
81
|
parser.add_argument('-e', '--exclude', nargs='*', required=False, default=[])
|
|
@@ -137,7 +140,8 @@ def create_report_file(
|
|
|
137
140
|
output_path: str = "", output_files: list = [],
|
|
138
141
|
output_extensions: list = [], correct_mode: bool = True,
|
|
139
142
|
correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
|
|
140
|
-
formats: list = [], api_limit_exceed: bool = False, files_count: int = 0, final_output_path: str = ""
|
|
143
|
+
formats: list = [], api_limit_exceed: bool = False, files_count: int = 0, final_output_path: str = "",
|
|
144
|
+
run_kb_msg: str = ""
|
|
141
145
|
) -> 'ScannerItem':
|
|
142
146
|
"""
|
|
143
147
|
Create report files for given scanned result.
|
|
@@ -206,14 +210,11 @@ def create_report_file(
|
|
|
206
210
|
if api_limit_exceed:
|
|
207
211
|
scan_item.set_cover_comment("SCANOSS skipped (API limits)")
|
|
208
212
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
scan_item.set_cover_comment("KB Enabled" if check_kb_server_reachable() else "KB Unreachable")
|
|
213
|
+
if run_kb_msg:
|
|
214
|
+
scan_item.set_cover_comment(run_kb_msg)
|
|
212
215
|
display_mode = selected_scanner
|
|
213
|
-
if selected_scanner ==
|
|
214
|
-
display_mode
|
|
215
|
-
elif selected_scanner == "all":
|
|
216
|
-
display_mode = "scancode, scanoss"
|
|
216
|
+
if selected_scanner == ALL_MODE:
|
|
217
|
+
display_mode = ", ".join([s for s in SCANNER_TYPE if s != ALL_MODE])
|
|
217
218
|
scan_item.set_cover_comment(f"Mode : {display_mode}")
|
|
218
219
|
|
|
219
220
|
if merged_result:
|
|
@@ -230,11 +231,17 @@ def create_report_file(
|
|
|
230
231
|
sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
|
|
231
232
|
elif selected_scanner == 'scanoss':
|
|
232
233
|
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)
|
|
234
|
+
elif selected_scanner == 'kb':
|
|
235
|
+
kb_ref = get_kb_reference_to_print(merged_result)
|
|
236
|
+
sheet_list["kb_reference"] = kb_ref
|
|
233
237
|
else:
|
|
234
238
|
sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
|
|
235
239
|
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)
|
|
236
|
-
|
|
237
|
-
|
|
240
|
+
kb_ref = get_kb_reference_to_print(merged_result)
|
|
241
|
+
sheet_list["kb_reference"] = kb_ref
|
|
242
|
+
|
|
243
|
+
if sheet_list:
|
|
244
|
+
scan_item.external_sheets = sheet_list
|
|
238
245
|
|
|
239
246
|
if correct_mode:
|
|
240
247
|
success, msg_correct, correct_item = correct_with_yaml(correct_filepath, path_to_scan, scan_item)
|
|
@@ -262,25 +269,56 @@ def create_report_file(
|
|
|
262
269
|
|
|
263
270
|
|
|
264
271
|
def check_kb_server_reachable() -> bool:
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
272
|
+
for attempt in range(3):
|
|
273
|
+
try:
|
|
274
|
+
request = urllib.request.Request(f"{KB_URL}health", method='GET')
|
|
275
|
+
with urllib.request.urlopen(request, timeout=10) as response:
|
|
276
|
+
logger.debug(f"KB server is reachable. Response status: {response.status}")
|
|
277
|
+
return True
|
|
278
|
+
except urllib.error.HTTPError:
|
|
279
|
+
logger.debug("KB server responded (HTTP error), considered reachable")
|
|
280
|
+
return True
|
|
281
|
+
except urllib.error.URLError as e:
|
|
282
|
+
logger.debug(f"KB server is unreachable (timeout or connection error): {e}")
|
|
283
|
+
if attempt < 2:
|
|
284
|
+
time.sleep(1)
|
|
285
|
+
else:
|
|
286
|
+
return False
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.debug(f"Unexpected error checking KB server: {e}")
|
|
289
|
+
if attempt < 2:
|
|
290
|
+
time.sleep(1)
|
|
291
|
+
else:
|
|
292
|
+
return False
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def get_kb_reference_to_print(merged_result: list) -> list:
|
|
297
|
+
"""
|
|
298
|
+
Build kb_reference sheet rows: file path and URL from _get_origin_url_from_md5_hash.
|
|
299
|
+
:param merged_result: list of SourceItem (merged scan result).
|
|
300
|
+
:return: list of rows, first row is header, rest are [source_path, kb_origin_url].
|
|
301
|
+
"""
|
|
302
|
+
rows = [item for item in merged_result if getattr(item, 'kb_origin_url', None)]
|
|
303
|
+
if not rows:
|
|
304
|
+
return [KB_REFERENCE_HEADER]
|
|
305
|
+
rows.sort(key=lambda x: x.source_name_or_path)
|
|
306
|
+
data = [
|
|
307
|
+
[
|
|
308
|
+
item.source_name_or_path,
|
|
309
|
+
item.kb_origin_url,
|
|
310
|
+
str(getattr(item, 'kb_evidence', '') or '')
|
|
311
|
+
]
|
|
312
|
+
for item in rows
|
|
313
|
+
]
|
|
314
|
+
data.insert(0, KB_REFERENCE_HEADER)
|
|
315
|
+
return data
|
|
279
316
|
|
|
280
317
|
|
|
281
318
|
def merge_results(
|
|
282
319
|
scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
|
|
283
|
-
path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {}
|
|
320
|
+
path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {},
|
|
321
|
+
excluded_files: set = None
|
|
284
322
|
) -> list:
|
|
285
323
|
|
|
286
324
|
"""
|
|
@@ -290,8 +328,11 @@ def merge_results(
|
|
|
290
328
|
:param spdx_downloads: dictionary of spdx parsed results.
|
|
291
329
|
:param path_to_scan: path to the scanned directory for constructing absolute file paths.
|
|
292
330
|
:param run_kb: if True, load kb result.
|
|
331
|
+
:param excluded_files: set of relative paths to exclude from KB-only file discovery.
|
|
293
332
|
:return merged_result: list of merged result in SourceItem.
|
|
294
333
|
"""
|
|
334
|
+
if excluded_files is None:
|
|
335
|
+
excluded_files = set()
|
|
295
336
|
|
|
296
337
|
scancode_result.extend([item for item in scanoss_result if item not in scancode_result])
|
|
297
338
|
|
|
@@ -319,16 +360,27 @@ def merge_results(
|
|
|
319
360
|
new_result_item.licenses = licenses
|
|
320
361
|
new_result_item.is_manifest_file = True
|
|
321
362
|
scancode_result.append(new_result_item)
|
|
322
|
-
if run_kb and not check_kb_server_reachable():
|
|
323
|
-
run_kb = False
|
|
324
|
-
if run_kb:
|
|
325
|
-
logger.info("KB server is reachable. Loading data from OSS KB.")
|
|
326
|
-
else:
|
|
327
|
-
logger.info("Skipping KB lookup.")
|
|
328
363
|
|
|
329
364
|
for item in scancode_result:
|
|
330
365
|
item.set_oss_item(path_to_scan, run_kb)
|
|
331
366
|
|
|
367
|
+
# Add OSSItem for files in path_to_scan that are not in scancode_result
|
|
368
|
+
# when KB returns an origin URL for their MD5 hash (skip excluded_files)
|
|
369
|
+
if run_kb:
|
|
370
|
+
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
371
|
+
scancode_paths = {item.source_name_or_path for item in scancode_result}
|
|
372
|
+
for root, _dirs, files in os.walk(path_to_scan):
|
|
373
|
+
for file in files:
|
|
374
|
+
file_path = os.path.join(root, file)
|
|
375
|
+
rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
|
|
376
|
+
if rel_path in scancode_paths or rel_path in excluded_files:
|
|
377
|
+
continue
|
|
378
|
+
extra_item = SourceItem(rel_path)
|
|
379
|
+
extra_item.set_oss_item(path_to_scan, run_kb)
|
|
380
|
+
if extra_item.download_location:
|
|
381
|
+
scancode_result.append(extra_item)
|
|
382
|
+
scancode_paths.add(rel_path)
|
|
383
|
+
|
|
332
384
|
return scancode_result
|
|
333
385
|
|
|
334
386
|
|
|
@@ -338,7 +390,7 @@ def run_scanners(
|
|
|
338
390
|
called_by_cli: bool = True, print_matched_text: bool = False,
|
|
339
391
|
formats: list = [], time_out: int = 120,
|
|
340
392
|
correct_mode: bool = True, correct_filepath: str = "",
|
|
341
|
-
selected_scanner: str =
|
|
393
|
+
selected_scanner: str = ALL_MODE, path_to_exclude: list = [],
|
|
342
394
|
all_exclude_mode: tuple = ()
|
|
343
395
|
) -> Tuple[bool, str, 'ScannerItem', list, list]:
|
|
344
396
|
"""
|
|
@@ -397,8 +449,8 @@ def run_scanners(
|
|
|
397
449
|
logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
|
|
398
450
|
|
|
399
451
|
if not selected_scanner:
|
|
400
|
-
selected_scanner =
|
|
401
|
-
if selected_scanner in ['scancode',
|
|
452
|
+
selected_scanner = ALL_MODE
|
|
453
|
+
if selected_scanner in ['scancode', ALL_MODE]:
|
|
402
454
|
success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
|
|
403
455
|
write_json_file, num_cores, True,
|
|
404
456
|
print_matched_text, formats, called_by_cli,
|
|
@@ -406,20 +458,28 @@ def run_scanners(
|
|
|
406
458
|
excluded_path_with_default_exclusion,
|
|
407
459
|
excluded_files)
|
|
408
460
|
excluded_files = set(excluded_files) if excluded_files else set()
|
|
409
|
-
if selected_scanner in ['scanoss',
|
|
461
|
+
if selected_scanner in ['scanoss', ALL_MODE]:
|
|
410
462
|
scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_path, formats, True, num_cores,
|
|
411
463
|
excluded_path_with_default_exclusion, excluded_files,
|
|
412
464
|
write_json_file)
|
|
413
465
|
|
|
466
|
+
run_kb_msg = ""
|
|
414
467
|
if selected_scanner in SCANNER_TYPE:
|
|
415
|
-
run_kb = True if selected_scanner in ['kb'] else False
|
|
468
|
+
run_kb = True if selected_scanner in ['kb', ALL_MODE] else False
|
|
469
|
+
if run_kb:
|
|
470
|
+
if not check_kb_server_reachable():
|
|
471
|
+
run_kb = False
|
|
472
|
+
run_kb_msg = "KB Unreachable"
|
|
473
|
+
else:
|
|
474
|
+
run_kb_msg = "KB Enabled"
|
|
475
|
+
|
|
416
476
|
spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
|
|
417
477
|
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
|
|
418
|
-
path_to_scan, run_kb, manifest_licenses)
|
|
478
|
+
path_to_scan, run_kb, manifest_licenses, excluded_files)
|
|
419
479
|
scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
|
|
420
480
|
print_matched_text, output_path, output_files, output_extensions, correct_mode,
|
|
421
481
|
correct_filepath, path_to_scan, excluded_path_without_dot, formats,
|
|
422
|
-
api_limit_exceed, cnt_file_except_skipped, final_output_path)
|
|
482
|
+
api_limit_exceed, cnt_file_except_skipped, final_output_path, run_kb_msg)
|
|
423
483
|
else:
|
|
424
484
|
print_help_msg_source_scanner()
|
|
425
485
|
result_log[RESULT_KEY] = "Unsupported scanner"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_license_matched.py
RENAMED
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_parsing_scanoss_file.py
RENAMED
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_manifest_extractor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_spdx_extractor.py
RENAMED
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/requires.txt
RENAMED
|
File without changes
|
{fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|