fosslight-source 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,12 +7,10 @@ import os
7
7
  import logging
8
8
  import re
9
9
  import fosslight_util.constant as constant
10
- from fosslight_util.get_pom_license import get_license_from_pom
11
10
  from ._license_matched import MatchedLicense
12
11
  from ._scan_item import SourceItem
13
12
  from ._scan_item import replace_word
14
13
  from ._scan_item import is_notice_file
15
- from ._scan_item import is_manifest_file
16
14
  from typing import Tuple
17
15
 
18
16
  logger = logging.getLogger(constant.LOGGER_NAME)
@@ -181,35 +179,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
181
179
  if len(license_detected) > 0:
182
180
  result_item.licenses = license_detected
183
181
 
184
- detected_without_pom = []
185
- if is_manifest_file(file_path) and len(license_detected) > 0:
186
- result_item.is_manifest_file = True
187
- if file_path.endswith('.pom'):
188
- try:
189
- pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
190
- normalize_pom_licenses = []
191
- if pom_licenses:
192
- pom_license_list = pom_licenses.split(', ')
193
- for pom_license in pom_license_list:
194
- if pom_license not in license_detected:
195
- for lic_matched_key, lic_info in license_list.items():
196
- if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
197
- matched_txt = str(lic_info.matched_text).replace(',', '')
198
- if pom_license in matched_txt:
199
- normalize_pom_licenses.append(lic_info.license)
200
- break
201
- else:
202
- normalize_pom_licenses.append(pom_license)
203
- detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
204
- if detected_without_pom:
205
- result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
206
- result_item.licenses = []
207
- result_item.licenses = normalize_pom_licenses
208
- if not normalize_pom_licenses:
209
- result_item.exclude = True
210
- except Exception as ex:
211
- logger.info(f"Failed to extract license from POM {file_path}: {ex}")
212
-
213
182
  # Remove copyright info for license text file of GPL family
214
183
  if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
215
184
  logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
@@ -217,7 +186,7 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
217
186
  else:
218
187
  result_item.copyright = copyright_value_list
219
188
 
220
- if len(license_expression_list) > 0 and not detected_without_pom:
189
+ if len(license_expression_list) > 0:
221
190
  license_expression_list = list(
222
191
  set(license_expression_list))
223
192
  result_item.comment = ','.join(license_expression_list)
@@ -314,35 +283,6 @@ def parsing_scancode_32_later(
314
283
  file.get("percentage_of_license_text", 0) > 90 and not is_source_file
315
284
  )
316
285
 
317
- detected_without_pom = []
318
- if is_manifest_file(file_path) and len(license_detected) > 0:
319
- result_item.is_manifest_file = True
320
- if file_path.endswith('.pom'):
321
- try:
322
- pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
323
- normalize_pom_licenses = []
324
- if pom_licenses:
325
- pom_license_list = pom_licenses.split(', ')
326
- for pom_license in pom_license_list:
327
- if pom_license not in license_detected:
328
- for lic_matched_key, lic_info in license_list.items():
329
- if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
330
- matched_txt = str(lic_info.matched_text).replace(',', '')
331
- if pom_license in matched_txt:
332
- normalize_pom_licenses.append(lic_info.license)
333
- break
334
- else:
335
- normalize_pom_licenses.append(pom_license)
336
- detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
337
- if detected_without_pom:
338
- result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
339
- result_item.licenses = []
340
- result_item.licenses = normalize_pom_licenses
341
- if not normalize_pom_licenses:
342
- result_item.exclude = True
343
- except Exception as ex:
344
- logger.info(f"Failed to extract license from POM {file_path}: {ex}")
345
-
346
286
  # Remove copyright info for license text file of GPL family
347
287
  if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
348
288
  logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
@@ -350,7 +290,7 @@ def parsing_scancode_32_later(
350
290
  else:
351
291
  result_item.copyright = copyright_value_list
352
292
 
353
- if len(license_detected) > 1 and not detected_without_pom:
293
+ if len(license_detected) > 1:
354
294
  license_expression_spdx = file.get("detected_license_expression_spdx", "")
355
295
  license_expression = file.get("detected_license_expression", "")
356
296
  if license_expression_spdx:
@@ -18,7 +18,7 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
18
18
  _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
19
19
  '[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
20
20
  'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
21
- _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'pubspec\.yaml$', r'.*\.podspec$', r'Cargo\.toml$']
21
+ _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'setup\.cfg$', r'.*\.podspec$', r'Cargo\.toml$']
22
22
  MAX_LICENSE_LENGTH = 200
23
23
  MAX_LICENSE_TOTAL_LENGTH = 600
24
24
  SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
fosslight_source/cli.py CHANGED
@@ -25,9 +25,13 @@ from .run_scanoss import get_scanoss_extra_info
25
25
  import yaml
26
26
  import argparse
27
27
  from .run_spdx_extractor import get_spdx_downloads
28
+ from .run_manifest_extractor import get_manifest_licenses
28
29
  from ._scan_item import SourceItem, KB_URL
29
30
  from fosslight_util.oss_item import ScannerItem
30
31
  from typing import Tuple
32
+ from ._scan_item import is_manifest_file
33
+ import shutil
34
+
31
35
 
32
36
  SRC_SHEET_NAME = 'SRC_FL_Source'
33
37
  SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
@@ -37,9 +41,7 @@ MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
37
41
  'OSS Version', 'License', 'Download Location',
38
42
  'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
39
43
  SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
40
- EXCLUDE_FILENAME = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
41
- "configure", "configure.ac", "depcomp", "compile", "missing", "Makefile"]
42
- EXCLUDE_FILE_EXTENSION = [".m4", ".in", ".po"]
44
+
43
45
 
44
46
  logger = logging.getLogger(constant.LOGGER_NAME)
45
47
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -135,7 +137,7 @@ def create_report_file(
135
137
  output_path: str = "", output_files: list = [],
136
138
  output_extensions: list = [], correct_mode: bool = True,
137
139
  correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
138
- formats: list = [], api_limit_exceed: bool = False, files_count: int = 0
140
+ formats: list = [], api_limit_exceed: bool = False, files_count: int = 0, final_output_path: str = ""
139
141
  ) -> 'ScannerItem':
140
142
  """
141
143
  Create report files for given scanned result.
@@ -149,10 +151,7 @@ def create_report_file(
149
151
  sheet_list = {}
150
152
  _json_ext = ".json"
151
153
 
152
- if output_path == "":
153
- output_path = os.getcwd()
154
- else:
155
- output_path = os.path.abspath(output_path)
154
+ output_path = os.path.abspath(output_path)
156
155
 
157
156
  if not output_files:
158
157
  # If -o does not contains file name, set default name
@@ -196,15 +195,27 @@ def create_report_file(
196
195
  scan_item.set_cover_pathinfo(path_to_scan, path_to_exclude)
197
196
  scan_item.set_cover_comment(f"Scanned files: {files_count}")
198
197
 
199
- if api_limit_exceed:
200
- scan_item.set_cover_comment("(Some of) SCANOSS scan was skipped. (API limits being exceeded)")
201
-
202
- if not merged_result:
198
+ if merged_result:
199
+ scan_item.set_cover_comment(f"Detected source : {len(merged_result)}")
200
+ else:
203
201
  if files_count < 1:
204
202
  scan_item.set_cover_comment("(No file detected.)")
205
203
  else:
206
204
  scan_item.set_cover_comment("(No OSS detected.)")
207
205
 
206
+ if api_limit_exceed:
207
+ scan_item.set_cover_comment("SCANOSS skipped (API limits)")
208
+
209
+ run_kb = True if selected_scanner in ['kb', 'all'] else False
210
+ if run_kb:
211
+ scan_item.set_cover_comment("KB Enabled" if check_kb_server_reachable() else "KB Unreachable")
212
+ display_mode = selected_scanner
213
+ if selected_scanner == "kb":
214
+ display_mode += ", scancode"
215
+ elif selected_scanner == "all":
216
+ display_mode = "kb, scancode, scanoss"
217
+ scan_item.set_cover_comment(f"Mode : {display_mode}")
218
+
208
219
  if merged_result:
209
220
  sheet_list = {}
210
221
  scan_item.append_file_items(merged_result, PKG_NAME)
@@ -240,12 +251,13 @@ def create_report_file(
240
251
  # del sheet_list["scanoss_reference"]
241
252
  results.append(write_output_file(combined_path_and_file, output_extension, scan_item, extended_header, "", output_format))
242
253
  for success, msg, result_file in results:
254
+ final_result_file = result_file.replace(output_path, final_output_path)
243
255
  if success:
244
- logger.info(f"Output file: {result_file}")
256
+ logger.info(f"Output file: {final_result_file}")
245
257
  for row in scan_item.get_cover_comment():
246
258
  logger.info(row)
247
259
  else:
248
- logger.error(f"Fail to generate result file {result_file}. msg:({msg})")
260
+ logger.error(f"Fail to generate result file {final_result_file}. msg:({msg})")
249
261
  return scan_item
250
262
 
251
263
 
@@ -265,7 +277,7 @@ def check_kb_server_reachable() -> bool:
265
277
 
266
278
  def merge_results(
267
279
  scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
268
- path_to_scan: str = "", run_kb: bool = False
280
+ path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {}
269
281
  ) -> list:
270
282
 
271
283
  """
@@ -291,6 +303,19 @@ def merge_results(
291
303
  new_result_item = SourceItem(file_name)
292
304
  new_result_item.download_location = download_location
293
305
  scancode_result.append(new_result_item)
306
+ if manifest_licenses:
307
+ for file_name, licenses in manifest_licenses.items():
308
+ if file_name in scancode_result:
309
+ merged_result_item = scancode_result[scancode_result.index(file_name)]
310
+ # overwrite existing detected licenses with manifest-provided licenses
311
+ merged_result_item.licenses = [] # clear existing licenses (setter clears when value falsy)
312
+ merged_result_item.licenses = licenses
313
+ merged_result_item.is_manifest_file = True
314
+ else:
315
+ new_result_item = SourceItem(file_name)
316
+ new_result_item.licenses = licenses
317
+ new_result_item.is_manifest_file = True
318
+ scancode_result.append(new_result_item)
294
319
  if run_kb and not check_kb_server_reachable():
295
320
  run_kb = False
296
321
  if run_kb:
@@ -310,7 +335,8 @@ def run_scanners(
310
335
  called_by_cli: bool = True, print_matched_text: bool = False,
311
336
  formats: list = [], time_out: int = 120,
312
337
  correct_mode: bool = True, correct_filepath: str = "",
313
- selected_scanner: str = 'all', path_to_exclude: list = []
338
+ selected_scanner: str = 'all', path_to_exclude: list = [],
339
+ all_exclude_mode: tuple = ()
314
340
  ) -> Tuple[bool, str, 'ScannerItem', list, list]:
315
341
  """
316
342
  Run Scancode and scanoss.py for the given path.
@@ -341,6 +367,11 @@ def run_scanners(
341
367
 
342
368
  success, msg, output_path, output_files, output_extensions, formats = check_output_formats_v2(output_file_name, formats)
343
369
 
370
+ if output_path == "":
371
+ output_path = os.getcwd()
372
+ final_output_path = output_path
373
+ output_path = os.path.join(os.path.dirname(output_path), '.fosslight_temp')
374
+
344
375
  logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
345
376
  True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
346
377
 
@@ -349,10 +380,18 @@ def run_scanners(
349
380
  print_matched_text = False
350
381
 
351
382
  if success:
352
- path_to_exclude_with_filename = path_to_exclude + EXCLUDE_FILENAME
353
- excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped = (
354
- get_excluded_paths(path_to_scan, path_to_exclude_with_filename, EXCLUDE_FILE_EXTENSION))
355
- logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
383
+ if all_exclude_mode and len(all_exclude_mode) == 4:
384
+ (excluded_path_with_default_exclusion,
385
+ excluded_path_without_dot,
386
+ excluded_files,
387
+ cnt_file_except_skipped) = all_exclude_mode
388
+ else:
389
+ path_to_exclude_with_filename = path_to_exclude
390
+ (excluded_path_with_default_exclusion,
391
+ excluded_path_without_dot,
392
+ excluded_files,
393
+ cnt_file_except_skipped) = get_excluded_paths(path_to_scan, path_to_exclude_with_filename)
394
+ logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
356
395
 
357
396
  if not selected_scanner:
358
397
  selected_scanner = 'all'
@@ -365,16 +404,17 @@ def run_scanners(
365
404
  excluded_files)
366
405
  excluded_files = set(excluded_files) if excluded_files else set()
367
406
  if selected_scanner in ['scanoss', 'all']:
368
- scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True, write_json_file,
407
+ scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True,
369
408
  num_cores, excluded_path_with_default_exclusion, excluded_files)
370
409
  if selected_scanner in SCANNER_TYPE:
371
410
  run_kb = True if selected_scanner in ['kb', 'all'] else False
372
- spdx_downloads = get_spdx_downloads(path_to_scan, excluded_files)
373
- merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads, path_to_scan, run_kb)
411
+ spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
412
+ merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
413
+ path_to_scan, run_kb, manifest_licenses)
374
414
  scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
375
415
  print_matched_text, output_path, output_files, output_extensions, correct_mode,
376
416
  correct_filepath, path_to_scan, excluded_path_without_dot, formats,
377
- api_limit_exceed, cnt_file_except_skipped)
417
+ api_limit_exceed, cnt_file_except_skipped, final_output_path)
378
418
  else:
379
419
  print_help_msg_source_scanner()
380
420
  result_log[RESULT_KEY] = "Unsupported scanner"
@@ -382,8 +422,48 @@ def run_scanners(
382
422
  else:
383
423
  result_log[RESULT_KEY] = f"Format error. {msg}"
384
424
  success = False
425
+
426
+ try:
427
+ shutil.copytree(output_path, final_output_path, dirs_exist_ok=True)
428
+ shutil.rmtree(output_path)
429
+ except Exception as ex:
430
+ logger.debug(f"Failed to move temp files: {ex}")
431
+
385
432
  return success, result_log.get(RESULT_KEY, ""), scan_item, license_list, scanoss_result
386
433
 
387
434
 
435
+ def metadata_collector(path_to_scan: str, excluded_files: set) -> dict:
436
+ """
437
+ Collect metadata for merging.
438
+
439
+ - Traverse files with exclusions applied
440
+ - spdx_downloads: {rel_path: [download_urls]}
441
+ - manifest_licenses: {rel_path: [license_names]}
442
+
443
+ :return: (spdx_downloads, manifest_licenses)
444
+ """
445
+ abs_path_to_scan = os.path.abspath(path_to_scan)
446
+ spdx_downloads = {}
447
+ manifest_licenses = {}
448
+
449
+ for root, dirs, files in os.walk(path_to_scan):
450
+ for file in files:
451
+ file_path = os.path.join(root, file)
452
+ rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
453
+ if rel_path_file in excluded_files:
454
+ continue
455
+
456
+ downloads = get_spdx_downloads(file_path)
457
+ if downloads:
458
+ spdx_downloads[rel_path_file] = downloads
459
+
460
+ if is_manifest_file(file_path):
461
+ licenses = get_manifest_licenses(file_path)
462
+ if licenses:
463
+ manifest_licenses[rel_path_file] = licenses
464
+
465
+ return spdx_downloads, manifest_licenses
466
+
467
+
388
468
  if __name__ == '__main__':
389
469
  main()
@@ -0,0 +1,249 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright (c) 2025 LG Electronics Inc.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+ import os
6
+ import json
7
+ import re
8
+ import logging
9
+ from fosslight_util.get_pom_license import get_license_from_pom
10
+ import fosslight_util.constant as constant
11
+
12
+ logger = logging.getLogger(constant.LOGGER_NAME)
13
+
14
+
15
+ def _split_spdx_expression(value: str) -> list[str]:
16
+ parts = re.split(r'\s+(?:OR|AND)\s+|[|]{2}|&&', value, flags=re.IGNORECASE)
17
+ tokens: list[str] = []
18
+ for part in parts:
19
+ token = part.strip().strip('()')
20
+ if token:
21
+ tokens.append(token)
22
+ unique: list[str] = []
23
+ for t in tokens:
24
+ if t not in unique:
25
+ unique.append(t)
26
+ return unique
27
+
28
+
29
+ def get_licenses_from_package_json(file_path: str) -> list[str]:
30
+ try:
31
+ with open(file_path, 'r', encoding='utf-8') as f:
32
+ data = json.load(f)
33
+ except Exception as ex:
34
+ logger.info(f"Failed to read package.json {file_path}: {ex}")
35
+ return []
36
+
37
+ if not isinstance(data, dict):
38
+ return []
39
+
40
+ licenses: list[str] = []
41
+ license_field = data.get('license')
42
+
43
+ if isinstance(license_field, str):
44
+ value = license_field.strip()
45
+ if value.upper().startswith('SEE LICENSE IN'):
46
+ return []
47
+ licenses.extend(_split_spdx_expression(value))
48
+ elif isinstance(license_field, dict):
49
+ type_val = license_field.get('type')
50
+ if isinstance(type_val, str):
51
+ type_val = type_val.strip()
52
+ if type_val:
53
+ licenses.append(type_val)
54
+
55
+ if not licenses:
56
+ legacy = data.get('licenses')
57
+ if isinstance(legacy, list):
58
+ for item in legacy:
59
+ if isinstance(item, str):
60
+ token = item.strip()
61
+ if token:
62
+ licenses.append(token)
63
+ elif isinstance(item, dict):
64
+ t = item.get('type')
65
+ if isinstance(t, str):
66
+ t = t.strip()
67
+ if t:
68
+ licenses.append(t)
69
+
70
+ unique: list[str] = []
71
+ for lic in licenses:
72
+ if lic not in unique:
73
+ unique.append(lic)
74
+ return unique
75
+
76
+
77
+ def get_licenses_from_setup_cfg(file_path: str) -> list[str]:
78
+ try:
79
+ import configparser
80
+ parser = configparser.ConfigParser()
81
+ parser.read(file_path, encoding='utf-8')
82
+ if parser.has_section('metadata'):
83
+ license_value = parser.get('metadata', 'license', fallback='').strip()
84
+ if license_value:
85
+ return _split_spdx_expression(license_value)
86
+ except Exception as ex:
87
+ logger.info(f"Failed to parse setup.cfg with configparser for {file_path}: {ex}")
88
+
89
+ try:
90
+ with open(file_path, 'r', encoding='utf-8') as f:
91
+ content = f.read()
92
+ meta_match = re.search(r'^\s*\[metadata\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
93
+ if not meta_match:
94
+ return []
95
+ block = meta_match.group(1)
96
+ m = re.search(r'^\s*license\s*=\s*(.+)$', block, flags=re.MULTILINE)
97
+ if not m:
98
+ return []
99
+ val = m.group(1).strip()
100
+ if (len(val) >= 2) and ((val[0] == val[-1]) and val[0] in ('"', "'")):
101
+ val = val[1:-1].strip()
102
+ if not val:
103
+ return []
104
+ return _split_spdx_expression(val)
105
+ except Exception as ex:
106
+ logger.info(f"Failed to parse setup.cfg {file_path} via regex fallback: {ex}")
107
+ return []
108
+
109
+
110
+ def get_licenses_from_setup_py(file_path: str) -> list[str]:
111
+ try:
112
+ with open(file_path, 'r', encoding='utf-8') as f:
113
+ content = f.read()
114
+ except Exception as ex:
115
+ logger.info(f"Failed to read setup.py {file_path}: {ex}")
116
+ return []
117
+
118
+ match = re.search(r'license\s*=\s*([\'"]{1,3})(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
119
+ if not match:
120
+ return []
121
+ value = match.group(2).strip()
122
+ if not value:
123
+ return []
124
+
125
+ return _split_spdx_expression(value)
126
+
127
+
128
+ def get_licenses_from_podspec(file_path: str) -> list[str]:
129
+ try:
130
+ with open(file_path, 'r', encoding='utf-8') as f:
131
+ content = f.read()
132
+ except Exception as ex:
133
+ logger.info(f"Failed to read podspec {file_path}: {ex}")
134
+ return []
135
+
136
+ m = re.search(r'\blicense\s*=\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE)
137
+ if m:
138
+ value = m.group(2).strip()
139
+ if value:
140
+ return _split_spdx_expression(value)
141
+
142
+ m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
143
+ if m:
144
+ value = m.group(2).strip()
145
+ if value:
146
+ return _split_spdx_expression(value)
147
+
148
+ m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*:(\w+)', content, flags=re.IGNORECASE | re.DOTALL)
149
+ if m:
150
+ value = m.group(1).strip()
151
+ if value:
152
+ return _split_spdx_expression(value)
153
+
154
+ m = re.search(r'\blicense\s*=\s*:(\w+)', content, flags=re.DOTALL | re.IGNORECASE)
155
+ if m:
156
+ value = m.group(1).strip()
157
+ if value:
158
+ return _split_spdx_expression(value)
159
+
160
+ return []
161
+
162
+
163
+ def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
164
+ try:
165
+ data = None
166
+ try:
167
+ import tomllib as toml_loader # Python 3.11+
168
+ with open(file_path, 'rb') as f:
169
+ data = toml_loader.load(f)
170
+ except Exception:
171
+ try:
172
+ import tomli as toml_loader # Backport
173
+ with open(file_path, 'rb') as f:
174
+ data = toml_loader.load(f)
175
+ except Exception:
176
+ data = None
177
+
178
+ if isinstance(data, dict):
179
+ package_tbl = data.get('package') or {}
180
+ license_value = package_tbl.get('license')
181
+ if isinstance(license_value, str) and license_value.strip():
182
+ return _split_spdx_expression(license_value.strip())
183
+ if package_tbl.get('license-file'):
184
+ return []
185
+ except Exception as ex:
186
+ logger.info(f"Failed to parse Cargo.toml via toml parser for {file_path}: {ex}")
187
+
188
+ try:
189
+ with open(file_path, 'r', encoding='utf-8') as f:
190
+ content = f.read()
191
+ pkg_match = re.search(r'^\s*\[package\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
192
+ if not pkg_match:
193
+ return []
194
+ block = pkg_match.group(1)
195
+ m = re.search(r'^\s*license\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)', block, flags=re.MULTILINE | re.DOTALL)
196
+ if m:
197
+ val = m.group('val').strip()
198
+ if val:
199
+ return _split_spdx_expression(val)
200
+ m2 = re.search(r'^\s*license-file\s*=\s*(?:"""|\'\'\'|"|\')(.*?)(?:"""|\'\'\'|"|\')', block,
201
+ flags=re.MULTILINE | re.DOTALL)
202
+ if m2:
203
+ return []
204
+ except Exception as ex:
205
+ logger.info(f"Failed to parse Cargo.toml {file_path}: {ex}")
206
+ return []
207
+ return []
208
+
209
+
210
+ def get_manifest_licenses(file_path: str) -> list[str]:
211
+ if file_path.endswith('.pom'):
212
+ try:
213
+ pom_licenses = get_license_from_pom(group_id='', artifact_id='', version='', pom_path=file_path, check_parent=True)
214
+ if not pom_licenses:
215
+ return []
216
+ return [x.strip() for x in pom_licenses.split(', ') if x.strip()]
217
+ except Exception as ex:
218
+ logger.info(f"Failed to extract license from POM {file_path}: {ex}")
219
+ return []
220
+ elif os.path.basename(file_path).lower() == 'package.json':
221
+ try:
222
+ return get_licenses_from_package_json(file_path)
223
+ except Exception as ex:
224
+ logger.info(f"Failed to extract license from package.json {file_path}: {ex}")
225
+ return []
226
+ elif os.path.basename(file_path).lower() == 'setup.cfg':
227
+ try:
228
+ return get_licenses_from_setup_cfg(file_path)
229
+ except Exception as ex:
230
+ logger.info(f"Failed to extract license from setup.cfg {file_path}: {ex}")
231
+ return []
232
+ elif os.path.basename(file_path).lower() == 'setup.py':
233
+ try:
234
+ return get_licenses_from_setup_py(file_path)
235
+ except Exception as ex:
236
+ logger.info(f"Failed to extract license from setup.py {file_path}: {ex}")
237
+ return []
238
+ elif os.path.basename(file_path).lower().endswith('.podspec'):
239
+ try:
240
+ return get_licenses_from_podspec(file_path)
241
+ except Exception as ex:
242
+ logger.info(f"Failed to extract license from podspec {file_path}: {ex}")
243
+ return []
244
+ elif os.path.basename(file_path).lower() == 'cargo.toml':
245
+ try:
246
+ return get_licenses_from_cargo_toml(file_path)
247
+ except Exception as ex:
248
+ logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
249
+ return []
@@ -8,14 +8,10 @@ import importlib_metadata
8
8
  import warnings
9
9
  import logging
10
10
  import json
11
- from datetime import datetime
12
11
  import fosslight_util.constant as constant
13
- from fosslight_util.set_log import init_log
14
12
  from fosslight_util.output_format import check_output_formats_v2 # , write_output_file
15
13
  from ._parsing_scanoss_file import parsing_scan_result # scanoss
16
14
  from ._parsing_scanoss_file import parsing_extra_info # scanoss
17
- import shutil
18
- from pathlib import Path
19
15
  from scanoss.scanner import Scanner, ScanType
20
16
  import io
21
17
  import contextlib
@@ -32,7 +28,7 @@ def get_scanoss_extra_info(scanned_result: dict) -> list:
32
28
 
33
29
 
34
30
  def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
35
- called_by_cli: bool = False, write_json_file: bool = False, num_threads: int = -1,
31
+ called_by_cli: bool = False, num_threads: int = -1,
36
32
  path_to_exclude: list = [], excluded_files: set = None) -> list:
37
33
  """
38
34
  Run scanoss.py for the given path.
@@ -46,13 +42,8 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
46
42
  """
47
43
  success, msg, output_path, output_files, output_extensions, formats = check_output_formats_v2(output_file_name, format)
48
44
 
49
- if not called_by_cli:
50
- global logger
51
- _start_time = datetime.now().strftime('%y%m%d_%H%M')
52
- logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"),
53
- True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
54
-
55
45
  scanoss_file_list = []
46
+ api_limit_exceed = False
56
47
  try:
57
48
  importlib_metadata.distribution("scanoss")
58
49
  except Exception as error:
@@ -60,12 +51,6 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
60
51
  logger.warning("Please install scanoss and dataclasses before run fosslight_source with scanoss option.")
61
52
  return scanoss_file_list
62
53
 
63
- if output_path == "": # if json output with _write_json_file not used, output_path won't be needed.
64
- output_path = os.getcwd()
65
- else:
66
- output_path = os.path.abspath(output_path)
67
- if not os.path.isdir(output_path):
68
- Path(output_path).mkdir(parents=True, exist_ok=True)
69
54
  output_json_file = os.path.join(output_path, SCANOSS_OUTPUT_FILE)
70
55
  if os.path.exists(output_json_file): # remove scanner_output.wfp file if exist
71
56
  os.remove(output_json_file)
@@ -84,7 +69,6 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
84
69
  scanner.scan_folder_with_options(scan_dir=path_to_scan)
85
70
  captured_output = output_buffer.getvalue()
86
71
  api_limit_exceed = "due to service limits being exceeded" in captured_output
87
- logger.debug(f"{captured_output}")
88
72
 
89
73
  if os.path.isfile(output_json_file):
90
74
  with open(output_json_file, "r") as st_json:
@@ -103,13 +87,4 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
103
87
 
104
88
  logger.info(f"|---Number of files detected with SCANOSS: {(len(scanoss_file_list))}")
105
89
 
106
- try:
107
- if write_json_file:
108
- shutil.move(SCANOSS_RESULT_FILE, output_path)
109
- else:
110
- os.remove(output_json_file)
111
- os.remove(SCANOSS_RESULT_FILE)
112
- except Exception as error:
113
- logger.debug(f"Moving scanoss raw files failed.: {error}")
114
-
115
90
  return scanoss_file_list, api_limit_exceed
@@ -12,26 +12,15 @@ import mmap
12
12
  logger = logging.getLogger(constant.LOGGER_NAME)
13
13
 
14
14
 
15
- def get_spdx_downloads(path_to_scan: str, path_to_exclude: set = None) -> dict:
16
- download_dict = {}
15
+ def get_spdx_downloads(file_path: str) -> list[str]:
16
+ results = []
17
17
  find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
18
- abs_path_to_scan = os.path.abspath(path_to_scan)
19
-
20
- for root, dirs, files in os.walk(path_to_scan):
21
- for file in files:
22
- file_path = os.path.join(root, file)
23
- rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
24
- if rel_path_file in path_to_exclude:
25
- continue
26
- try:
27
- if os.path.getsize(file_path) > 0:
28
- with open(file_path, "r") as f:
29
- with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
30
- for word in find_word.findall(mmap_obj):
31
- if rel_path_file in download_dict:
32
- download_dict[rel_path_file].append(word.decode('utf-8'))
33
- else:
34
- download_dict[rel_path_file] = [word.decode('utf-8')]
35
- except Exception as ex:
36
- logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {ex}")
37
- return download_dict
18
+ try:
19
+ if os.path.getsize(file_path) > 0:
20
+ with open(file_path, "r") as f:
21
+ with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
22
+ for word in find_word.findall(mmap_obj):
23
+ results.append(word.decode('utf-8'))
24
+ except Exception as ex:
25
+ logger.warning(f"Failed to extract SPDX download location. {file_path}, {ex}")
26
+ return results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.1
3
+ Version: 2.2.3
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -17,7 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: pyparsing
18
18
  Requires-Dist: scanoss>=1.18.0
19
19
  Requires-Dist: XlsxWriter
20
- Requires-Dist: fosslight_util>=2.1.34
20
+ Requires-Dist: fosslight_util>=2.1.37
21
21
  Requires-Dist: PyYAML
22
22
  Requires-Dist: wheel>=0.38.1
23
23
  Requires-Dist: intbitset
@@ -0,0 +1,17 @@
1
+ fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
3
+ fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
4
+ fosslight_source/_parsing_scancode_file_item.py,sha256=9TjCwTTPaytqTlZzCKzaX-n20xJLy346xwp0Ee-rWWA,14951
5
+ fosslight_source/_parsing_scanoss_file.py,sha256=L3iHqmQF2jeSpHYuYSre44doXKy-BoX0u1Lm2IfJSU8,3866
6
+ fosslight_source/_scan_item.py,sha256=rWoC-jMc6Hf_dpiwVdQjATNhkzRgLVn966q3UA1TAxc,9412
7
+ fosslight_source/cli.py,sha256=GDwBlwgSvPtWlKhk6hZWQSUl71MJYaGIE4_Mcp1w_L8,20367
8
+ fosslight_source/run_manifest_extractor.py,sha256=8itu69eoriEnSskVDF9rnkLZ3d515KTaKy6tkZKWgBk,8940
9
+ fosslight_source/run_scancode.py,sha256=TFyNLV6P9rSBo9royDoG6az4l7Tkpl8Gr66IFK1DBU8,9021
10
+ fosslight_source/run_scanoss.py,sha256=91hQ60BcjgCkfrcWuMYoR4lRSCa1-AdLMxVubTx_Wv4,3763
11
+ fosslight_source/run_spdx_extractor.py,sha256=wIdjDIpzglc2tfrh_YodkAQ0QqfgfmJBSuE2bf4w4Tg,862
12
+ fosslight_source-2.2.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
13
+ fosslight_source-2.2.3.dist-info/METADATA,sha256=Md1A-4vaFc-sabOYYLntHZFo6NoohWA-Sn_H7K4LWus,3557
14
+ fosslight_source-2.2.3.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
15
+ fosslight_source-2.2.3.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
16
+ fosslight_source-2.2.3.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
17
+ fosslight_source-2.2.3.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
3
- fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
4
- fosslight_source/_parsing_scancode_file_item.py,sha256=-shPakF0oQWDzxWFylE2dQ93O4tgCudYM2zvX4K5glQ,19386
5
- fosslight_source/_parsing_scanoss_file.py,sha256=L3iHqmQF2jeSpHYuYSre44doXKy-BoX0u1Lm2IfJSU8,3866
6
- fosslight_source/_scan_item.py,sha256=NMmYaqdpNM-yeJxXPVPmoPo_thOnaAGRXYDEcpD6s2Y,9415
7
- fosslight_source/cli.py,sha256=qbp87Rhe5c2hIcF1-5TR6btPeOCe32Ffq1pxJM9ADcY,17303
8
- fosslight_source/run_scancode.py,sha256=TFyNLV6P9rSBo9royDoG6az4l7Tkpl8Gr66IFK1DBU8,9021
9
- fosslight_source/run_scanoss.py,sha256=_gdA4kOByI4saT4bDvMwIabpxtpH4f_yruHdBtb_g-o,4852
10
- fosslight_source/run_spdx_extractor.py,sha256=LLyYKpkpD5Qxkv_qTNBrFtKvrmnXw9SFYZCkpishb_c,1537
11
- fosslight_source-2.2.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
- fosslight_source-2.2.1.dist-info/METADATA,sha256=dXxCjJqwvuDy1Yz4KTltcIeO6etLzTx0AbwF_x0aGkU,3557
13
- fosslight_source-2.2.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
14
- fosslight_source-2.2.1.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
15
- fosslight_source-2.2.1.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
16
- fosslight_source-2.2.1.dist-info/RECORD,,