fosslight-source 2.2.10__tar.gz → 2.2.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {fosslight_source-2.2.10/src/fosslight_source.egg-info → fosslight_source-2.2.11}/PKG-INFO +1 -1
  2. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/setup.py +1 -1
  3. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_scan_item.py +5 -1
  4. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/cli.py +100 -40
  5. {fosslight_source-2.2.10 → fosslight_source-2.2.11/src/fosslight_source.egg-info}/PKG-INFO +1 -1
  6. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/LICENSE +0 -0
  7. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/MANIFEST.in +0 -0
  8. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/README.md +0 -0
  9. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/requirements.txt +0 -0
  10. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/setup.cfg +0 -0
  11. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/__init__.py +0 -0
  12. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_help.py +0 -0
  13. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_license_matched.py +0 -0
  14. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_parsing_scancode_file_item.py +0 -0
  15. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
  16. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_manifest_extractor.py +0 -0
  17. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_scancode.py +0 -0
  18. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_scanoss.py +0 -0
  19. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source/run_spdx_extractor.py +0 -0
  20. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/SOURCES.txt +0 -0
  21. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
  22. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/entry_points.txt +0 -0
  23. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/requires.txt +0 -0
  24. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/src/fosslight_source.egg-info/top_level.txt +0 -0
  25. {fosslight_source-2.2.10 → fosslight_source-2.2.11}/tests/test_tox.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.10
3
+ Version: 2.2.11
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
14
14
  if __name__ == "__main__":
15
15
  setup(
16
16
  name='fosslight_source',
17
- version='2.2.10',
17
+ version='2.2.11',
18
18
  package_dir={"": "src"},
19
19
  packages=find_packages(where='src'),
20
20
  description='FOSSLight Source Scanner',
@@ -44,6 +44,8 @@ class SourceItem(FileItem):
44
44
  self.oss_version = ""
45
45
 
46
46
  self.checksum = get_checksum_sha1(value)
47
+ self.kb_origin_url = "" # URL from OSS KB (_get_origin_url_from_md5_hash)
48
+ self.kb_evidence = "" # Evidence from KB API (exact_match or code snippet)
47
49
 
48
50
  def __del__(self) -> None:
49
51
  pass
@@ -104,6 +106,7 @@ class SourceItem(FileItem):
104
106
  return md5_hex, wfp
105
107
 
106
108
  def _get_origin_url_from_md5_hash(self, md5_hash: str, wfp: str = "") -> str:
109
+ """Return origin_url from KB API."""
107
110
  try:
108
111
  payload = {"file_hash": md5_hash}
109
112
  if wfp and wfp.strip():
@@ -115,7 +118,6 @@ class SourceItem(FileItem):
115
118
  with urllib.request.urlopen(request, timeout=10) as response:
116
119
  data = json.loads(response.read().decode())
117
120
  if isinstance(data, dict):
118
- # Only extract output if return_code is 0 (success)
119
121
  return_code = data.get('return_code', -1)
120
122
  if return_code == 0:
121
123
  output = data.get('output', '')
@@ -183,6 +185,8 @@ class SourceItem(FileItem):
183
185
  if md5_hash:
184
186
  origin_url = self._get_origin_url_from_md5_hash(md5_hash, wfp)
185
187
  if origin_url:
188
+ self.kb_origin_url = origin_url
189
+ self.kb_evidence = "exact_match"
186
190
  extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
187
191
  if extracted_name:
188
192
  self.oss_name = extracted_name
@@ -6,6 +6,7 @@
6
6
  import sys
7
7
  import os
8
8
  import platform
9
+ import time
9
10
  import warnings
10
11
  import logging
11
12
  import urllib.request
@@ -40,7 +41,9 @@ SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
40
41
  MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
41
42
  'OSS Version', 'License', 'Download Location',
42
43
  'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
43
- SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
44
+ KB_REFERENCE_HEADER = ['ID', 'Source Path', 'KB Origin URL', 'Evidence']
45
+ ALL_MODE = 'all'
46
+ SCANNER_TYPE = ['kb', 'scancode', 'scanoss', ALL_MODE]
44
47
 
45
48
 
46
49
  logger = logging.getLogger(constant.LOGGER_NAME)
@@ -72,7 +75,7 @@ def main() -> None:
72
75
  parser.add_argument('-o', '--output', nargs=1, type=str, required=False, default="")
73
76
  parser.add_argument('-m', '--matched', action='store_true', required=False)
74
77
  parser.add_argument('-f', '--formats', nargs='*', type=str, required=False)
75
- parser.add_argument('-s', '--scanner', nargs=1, type=str, required=False, default='all')
78
+ parser.add_argument('-s', '--scanner', nargs=1, type=str, required=False, default=ALL_MODE)
76
79
  parser.add_argument('-t', '--timeout', type=int, required=False, default=120)
77
80
  parser.add_argument('-c', '--cores', type=int, required=False, default=-1)
78
81
  parser.add_argument('-e', '--exclude', nargs='*', required=False, default=[])
@@ -137,7 +140,8 @@ def create_report_file(
137
140
  output_path: str = "", output_files: list = [],
138
141
  output_extensions: list = [], correct_mode: bool = True,
139
142
  correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
140
- formats: list = [], api_limit_exceed: bool = False, files_count: int = 0, final_output_path: str = ""
143
+ formats: list = [], api_limit_exceed: bool = False, files_count: int = 0, final_output_path: str = "",
144
+ run_kb_msg: str = ""
141
145
  ) -> 'ScannerItem':
142
146
  """
143
147
  Create report files for given scanned result.
@@ -206,14 +210,11 @@ def create_report_file(
206
210
  if api_limit_exceed:
207
211
  scan_item.set_cover_comment("SCANOSS skipped (API limits)")
208
212
 
209
- run_kb = True if selected_scanner in ['kb'] else False
210
- if run_kb:
211
- scan_item.set_cover_comment("KB Enabled" if check_kb_server_reachable() else "KB Unreachable")
213
+ if run_kb_msg:
214
+ scan_item.set_cover_comment(run_kb_msg)
212
215
  display_mode = selected_scanner
213
- if selected_scanner == "kb":
214
- display_mode += ", scancode"
215
- elif selected_scanner == "all":
216
- display_mode = "scancode, scanoss"
216
+ if selected_scanner == ALL_MODE:
217
+ display_mode = ", ".join([s for s in SCANNER_TYPE if s != ALL_MODE])
217
218
  scan_item.set_cover_comment(f"Mode : {display_mode}")
218
219
 
219
220
  if merged_result:
@@ -230,11 +231,17 @@ def create_report_file(
230
231
  sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
231
232
  elif selected_scanner == 'scanoss':
232
233
  sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)
234
+ elif selected_scanner == 'kb':
235
+ kb_ref = get_kb_reference_to_print(merged_result)
236
+ sheet_list["kb_reference"] = kb_ref
233
237
  else:
234
238
  sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
235
239
  sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)
236
- if sheet_list:
237
- scan_item.external_sheets = sheet_list
240
+ kb_ref = get_kb_reference_to_print(merged_result)
241
+ sheet_list["kb_reference"] = kb_ref
242
+
243
+ if sheet_list:
244
+ scan_item.external_sheets = sheet_list
238
245
 
239
246
  if correct_mode:
240
247
  success, msg_correct, correct_item = correct_with_yaml(correct_filepath, path_to_scan, scan_item)
@@ -262,25 +269,56 @@ def create_report_file(
262
269
 
263
270
 
264
271
  def check_kb_server_reachable() -> bool:
265
- try:
266
- request = urllib.request.Request(f"{KB_URL}health", method='GET')
267
- with urllib.request.urlopen(request, timeout=10) as response:
268
- logger.debug(f"KB server is reachable. Response status: {response.status}")
269
- return response.status != 404
270
- except urllib.error.HTTPError as e:
271
- logger.debug(f"KB server returned HTTP error: {e.code}")
272
- return e.code != 404
273
- except urllib.error.URLError as e:
274
- logger.debug(f"KB server is unreachable (timeout or connection error): {e}")
275
- return False
276
- except Exception as e:
277
- logger.debug(f"Unexpected error checking KB server: {e}")
278
- return False
272
+ for attempt in range(3):
273
+ try:
274
+ request = urllib.request.Request(f"{KB_URL}health", method='GET')
275
+ with urllib.request.urlopen(request, timeout=10) as response:
276
+ logger.debug(f"KB server is reachable. Response status: {response.status}")
277
+ return True
278
+ except urllib.error.HTTPError:
279
+ logger.debug("KB server responded (HTTP error), considered reachable")
280
+ return True
281
+ except urllib.error.URLError as e:
282
+ logger.debug(f"KB server is unreachable (timeout or connection error): {e}")
283
+ if attempt < 2:
284
+ time.sleep(1)
285
+ else:
286
+ return False
287
+ except Exception as e:
288
+ logger.debug(f"Unexpected error checking KB server: {e}")
289
+ if attempt < 2:
290
+ time.sleep(1)
291
+ else:
292
+ return False
293
+ return False
294
+
295
+
296
+ def get_kb_reference_to_print(merged_result: list) -> list:
297
+ """
298
+ Build kb_reference sheet rows: file path and URL from _get_origin_url_from_md5_hash.
299
+ :param merged_result: list of SourceItem (merged scan result).
300
+ :return: list of rows, first row is header, rest are [source_path, kb_origin_url].
301
+ """
302
+ rows = [item for item in merged_result if getattr(item, 'kb_origin_url', None)]
303
+ if not rows:
304
+ return [KB_REFERENCE_HEADER]
305
+ rows.sort(key=lambda x: x.source_name_or_path)
306
+ data = [
307
+ [
308
+ item.source_name_or_path,
309
+ item.kb_origin_url,
310
+ str(getattr(item, 'kb_evidence', '') or '')
311
+ ]
312
+ for item in rows
313
+ ]
314
+ data.insert(0, KB_REFERENCE_HEADER)
315
+ return data
279
316
 
280
317
 
281
318
  def merge_results(
282
319
  scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
283
- path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {}
320
+ path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {},
321
+ excluded_files: set = None
284
322
  ) -> list:
285
323
 
286
324
  """
@@ -290,8 +328,11 @@ def merge_results(
290
328
  :param spdx_downloads: dictionary of spdx parsed results.
291
329
  :param path_to_scan: path to the scanned directory for constructing absolute file paths.
292
330
  :param run_kb: if True, load kb result.
331
+ :param excluded_files: set of relative paths to exclude from KB-only file discovery.
293
332
  :return merged_result: list of merged result in SourceItem.
294
333
  """
334
+ if excluded_files is None:
335
+ excluded_files = set()
295
336
 
296
337
  scancode_result.extend([item for item in scanoss_result if item not in scancode_result])
297
338
 
@@ -319,16 +360,27 @@ def merge_results(
319
360
  new_result_item.licenses = licenses
320
361
  new_result_item.is_manifest_file = True
321
362
  scancode_result.append(new_result_item)
322
- if run_kb and not check_kb_server_reachable():
323
- run_kb = False
324
- if run_kb:
325
- logger.info("KB server is reachable. Loading data from OSS KB.")
326
- else:
327
- logger.info("Skipping KB lookup.")
328
363
 
329
364
  for item in scancode_result:
330
365
  item.set_oss_item(path_to_scan, run_kb)
331
366
 
367
+ # Add OSSItem for files in path_to_scan that are not in scancode_result
368
+ # when KB returns an origin URL for their MD5 hash (skip excluded_files)
369
+ if run_kb:
370
+ abs_path_to_scan = os.path.abspath(path_to_scan)
371
+ scancode_paths = {item.source_name_or_path for item in scancode_result}
372
+ for root, _dirs, files in os.walk(path_to_scan):
373
+ for file in files:
374
+ file_path = os.path.join(root, file)
375
+ rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
376
+ if rel_path in scancode_paths or rel_path in excluded_files:
377
+ continue
378
+ extra_item = SourceItem(rel_path)
379
+ extra_item.set_oss_item(path_to_scan, run_kb)
380
+ if extra_item.download_location:
381
+ scancode_result.append(extra_item)
382
+ scancode_paths.add(rel_path)
383
+
332
384
  return scancode_result
333
385
 
334
386
 
@@ -338,7 +390,7 @@ def run_scanners(
338
390
  called_by_cli: bool = True, print_matched_text: bool = False,
339
391
  formats: list = [], time_out: int = 120,
340
392
  correct_mode: bool = True, correct_filepath: str = "",
341
- selected_scanner: str = 'all', path_to_exclude: list = [],
393
+ selected_scanner: str = ALL_MODE, path_to_exclude: list = [],
342
394
  all_exclude_mode: tuple = ()
343
395
  ) -> Tuple[bool, str, 'ScannerItem', list, list]:
344
396
  """
@@ -397,8 +449,8 @@ def run_scanners(
397
449
  logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
398
450
 
399
451
  if not selected_scanner:
400
- selected_scanner = 'all'
401
- if selected_scanner in ['scancode', 'all', 'kb']:
452
+ selected_scanner = ALL_MODE
453
+ if selected_scanner in ['scancode', ALL_MODE]:
402
454
  success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
403
455
  write_json_file, num_cores, True,
404
456
  print_matched_text, formats, called_by_cli,
@@ -406,20 +458,28 @@ def run_scanners(
406
458
  excluded_path_with_default_exclusion,
407
459
  excluded_files)
408
460
  excluded_files = set(excluded_files) if excluded_files else set()
409
- if selected_scanner in ['scanoss', 'all']:
461
+ if selected_scanner in ['scanoss', ALL_MODE]:
410
462
  scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_path, formats, True, num_cores,
411
463
  excluded_path_with_default_exclusion, excluded_files,
412
464
  write_json_file)
413
465
 
466
+ run_kb_msg = ""
414
467
  if selected_scanner in SCANNER_TYPE:
415
- run_kb = True if selected_scanner in ['kb'] else False
468
+ run_kb = True if selected_scanner in ['kb', ALL_MODE] else False
469
+ if run_kb:
470
+ if not check_kb_server_reachable():
471
+ run_kb = False
472
+ run_kb_msg = "KB Unreachable"
473
+ else:
474
+ run_kb_msg = "KB Enabled"
475
+
416
476
  spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
417
477
  merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
418
- path_to_scan, run_kb, manifest_licenses)
478
+ path_to_scan, run_kb, manifest_licenses, excluded_files)
419
479
  scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
420
480
  print_matched_text, output_path, output_files, output_extensions, correct_mode,
421
481
  correct_filepath, path_to_scan, excluded_path_without_dot, formats,
422
- api_limit_exceed, cnt_file_except_skipped, final_output_path)
482
+ api_limit_exceed, cnt_file_except_skipped, final_output_path, run_kb_msg)
423
483
  else:
424
484
  print_help_msg_source_scanner()
425
485
  result_log[RESULT_KEY] = "Unsupported scanner"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.10
3
+ Version: 2.2.11
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner