fosslight-source 2.2.1__tar.gz → 2.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {fosslight_source-2.2.1/src/fosslight_source.egg-info → fosslight_source-2.2.2}/PKG-INFO +1 -1
  2. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/setup.py +1 -1
  3. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_parsing_scancode_file_item.py +2 -62
  4. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_scan_item.py +1 -1
  5. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/cli.py +53 -3
  6. fosslight_source-2.2.2/src/fosslight_source/run_manifest_extractor.py +251 -0
  7. fosslight_source-2.2.2/src/fosslight_source/run_spdx_extractor.py +26 -0
  8. {fosslight_source-2.2.1 → fosslight_source-2.2.2/src/fosslight_source.egg-info}/PKG-INFO +1 -1
  9. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/SOURCES.txt +1 -0
  10. fosslight_source-2.2.1/src/fosslight_source/run_spdx_extractor.py +0 -37
  11. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/LICENSE +0 -0
  12. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/MANIFEST.in +0 -0
  13. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/README.md +0 -0
  14. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/requirements.txt +0 -0
  15. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/setup.cfg +0 -0
  16. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/__init__.py +0 -0
  17. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_help.py +0 -0
  18. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_license_matched.py +0 -0
  19. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
  20. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/run_scancode.py +0 -0
  21. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source/run_scanoss.py +0 -0
  22. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
  23. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/entry_points.txt +0 -0
  24. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/requires.txt +0 -0
  25. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/src/fosslight_source.egg-info/top_level.txt +0 -0
  26. {fosslight_source-2.2.1 → fosslight_source-2.2.2}/tests/test_tox.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.1
3
+ Version: 2.2.2
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
14
14
  if __name__ == "__main__":
15
15
  setup(
16
16
  name='fosslight_source',
17
- version='2.2.1',
17
+ version='2.2.2',
18
18
  package_dir={"": "src"},
19
19
  packages=find_packages(where='src'),
20
20
  description='FOSSLight Source Scanner',
@@ -7,12 +7,10 @@ import os
7
7
  import logging
8
8
  import re
9
9
  import fosslight_util.constant as constant
10
- from fosslight_util.get_pom_license import get_license_from_pom
11
10
  from ._license_matched import MatchedLicense
12
11
  from ._scan_item import SourceItem
13
12
  from ._scan_item import replace_word
14
13
  from ._scan_item import is_notice_file
15
- from ._scan_item import is_manifest_file
16
14
  from typing import Tuple
17
15
 
18
16
  logger = logging.getLogger(constant.LOGGER_NAME)
@@ -181,35 +179,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
181
179
  if len(license_detected) > 0:
182
180
  result_item.licenses = license_detected
183
181
 
184
- detected_without_pom = []
185
- if is_manifest_file(file_path) and len(license_detected) > 0:
186
- result_item.is_manifest_file = True
187
- if file_path.endswith('.pom'):
188
- try:
189
- pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
190
- normalize_pom_licenses = []
191
- if pom_licenses:
192
- pom_license_list = pom_licenses.split(', ')
193
- for pom_license in pom_license_list:
194
- if pom_license not in license_detected:
195
- for lic_matched_key, lic_info in license_list.items():
196
- if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
197
- matched_txt = str(lic_info.matched_text).replace(',', '')
198
- if pom_license in matched_txt:
199
- normalize_pom_licenses.append(lic_info.license)
200
- break
201
- else:
202
- normalize_pom_licenses.append(pom_license)
203
- detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
204
- if detected_without_pom:
205
- result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
206
- result_item.licenses = []
207
- result_item.licenses = normalize_pom_licenses
208
- if not normalize_pom_licenses:
209
- result_item.exclude = True
210
- except Exception as ex:
211
- logger.info(f"Failed to extract license from POM {file_path}: {ex}")
212
-
213
182
  # Remove copyright info for license text file of GPL family
214
183
  if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
215
184
  logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
@@ -217,7 +186,7 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
217
186
  else:
218
187
  result_item.copyright = copyright_value_list
219
188
 
220
- if len(license_expression_list) > 0 and not detected_without_pom:
189
+ if len(license_expression_list) > 0:
221
190
  license_expression_list = list(
222
191
  set(license_expression_list))
223
192
  result_item.comment = ','.join(license_expression_list)
@@ -314,35 +283,6 @@ def parsing_scancode_32_later(
314
283
  file.get("percentage_of_license_text", 0) > 90 and not is_source_file
315
284
  )
316
285
 
317
- detected_without_pom = []
318
- if is_manifest_file(file_path) and len(license_detected) > 0:
319
- result_item.is_manifest_file = True
320
- if file_path.endswith('.pom'):
321
- try:
322
- pom_licenses = get_license_from_pom(pom_path=file_path, check_parent=False)
323
- normalize_pom_licenses = []
324
- if pom_licenses:
325
- pom_license_list = pom_licenses.split(', ')
326
- for pom_license in pom_license_list:
327
- if pom_license not in license_detected:
328
- for lic_matched_key, lic_info in license_list.items():
329
- if hasattr(lic_info, 'matched_text') and lic_info.matched_text:
330
- matched_txt = str(lic_info.matched_text).replace(',', '')
331
- if pom_license in matched_txt:
332
- normalize_pom_licenses.append(lic_info.license)
333
- break
334
- else:
335
- normalize_pom_licenses.append(pom_license)
336
- detected_without_pom = list(set(license_detected) - set(normalize_pom_licenses))
337
- if detected_without_pom:
338
- result_item.comment = f"Detected: {', '.join(detected_without_pom)}"
339
- result_item.licenses = []
340
- result_item.licenses = normalize_pom_licenses
341
- if not normalize_pom_licenses:
342
- result_item.exclude = True
343
- except Exception as ex:
344
- logger.info(f"Failed to extract license from POM {file_path}: {ex}")
345
-
346
286
  # Remove copyright info for license text file of GPL family
347
287
  if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
348
288
  logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
@@ -350,7 +290,7 @@ def parsing_scancode_32_later(
350
290
  else:
351
291
  result_item.copyright = copyright_value_list
352
292
 
353
- if len(license_detected) > 1 and not detected_without_pom:
293
+ if len(license_detected) > 1:
354
294
  license_expression_spdx = file.get("detected_license_expression_spdx", "")
355
295
  license_expression = file.get("detected_license_expression", "")
356
296
  if license_expression_spdx:
@@ -18,7 +18,7 @@ replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-", "lic
18
18
  _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'copying*', 'patent[s]?', 'unlicen[cs]e', 'eula',
19
19
  '[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
20
20
  'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
21
- _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'pubspec\.yaml$', r'.*\.podspec$', r'Cargo\.toml$']
21
+ _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'setup\.cfg$', r'.*\.podspec$', r'Cargo\.toml$']
22
22
  MAX_LICENSE_LENGTH = 200
23
23
  MAX_LICENSE_TOTAL_LENGTH = 600
24
24
  SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
@@ -25,9 +25,12 @@ from .run_scanoss import get_scanoss_extra_info
25
25
  import yaml
26
26
  import argparse
27
27
  from .run_spdx_extractor import get_spdx_downloads
28
+ from .run_manifest_extractor import get_manifest_licenses
28
29
  from ._scan_item import SourceItem, KB_URL
29
30
  from fosslight_util.oss_item import ScannerItem
30
31
  from typing import Tuple
32
+ from ._scan_item import is_manifest_file
33
+
31
34
 
32
35
  SRC_SHEET_NAME = 'SRC_FL_Source'
33
36
  SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
@@ -265,7 +268,7 @@ def check_kb_server_reachable() -> bool:
265
268
 
266
269
  def merge_results(
267
270
  scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
268
- path_to_scan: str = "", run_kb: bool = False
271
+ path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {}
269
272
  ) -> list:
270
273
 
271
274
  """
@@ -291,6 +294,19 @@ def merge_results(
291
294
  new_result_item = SourceItem(file_name)
292
295
  new_result_item.download_location = download_location
293
296
  scancode_result.append(new_result_item)
297
+ if manifest_licenses:
298
+ for file_name, licenses in manifest_licenses.items():
299
+ if file_name in scancode_result:
300
+ merged_result_item = scancode_result[scancode_result.index(file_name)]
301
+ # overwrite existing detected licenses with manifest-provided licenses
302
+ merged_result_item.licenses = [] # clear existing licenses (setter clears when value falsy)
303
+ merged_result_item.licenses = licenses
304
+ merged_result_item.is_manifest_file = True
305
+ else:
306
+ new_result_item = SourceItem(file_name)
307
+ new_result_item.licenses = licenses
308
+ new_result_item.is_manifest_file = True
309
+ scancode_result.append(new_result_item)
294
310
  if run_kb and not check_kb_server_reachable():
295
311
  run_kb = False
296
312
  if run_kb:
@@ -369,8 +385,9 @@ def run_scanners(
369
385
  num_cores, excluded_path_with_default_exclusion, excluded_files)
370
386
  if selected_scanner in SCANNER_TYPE:
371
387
  run_kb = True if selected_scanner in ['kb', 'all'] else False
372
- spdx_downloads = get_spdx_downloads(path_to_scan, excluded_files)
373
- merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads, path_to_scan, run_kb)
388
+ spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
389
+ merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads,
390
+ path_to_scan, run_kb, manifest_licenses)
374
391
  scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
375
392
  print_matched_text, output_path, output_files, output_extensions, correct_mode,
376
393
  correct_filepath, path_to_scan, excluded_path_without_dot, formats,
@@ -385,5 +402,38 @@ def run_scanners(
385
402
  return success, result_log.get(RESULT_KEY, ""), scan_item, license_list, scanoss_result
386
403
 
387
404
 
405
+ def metadata_collector(path_to_scan: str, excluded_files: set) -> dict:
406
+ """
407
+ Collect metadata for merging.
408
+
409
+ - Traverse files with exclusions applied
410
+ - spdx_downloads: {rel_path: [download_urls]}
411
+ - manifest_licenses: {rel_path: [license_names]}
412
+
413
+ :return: (spdx_downloads, manifest_licenses)
414
+ """
415
+ abs_path_to_scan = os.path.abspath(path_to_scan)
416
+ spdx_downloads = {}
417
+ manifest_licenses = {}
418
+
419
+ for root, dirs, files in os.walk(path_to_scan):
420
+ for file in files:
421
+ file_path = os.path.join(root, file)
422
+ rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
423
+ if rel_path_file in excluded_files:
424
+ continue
425
+
426
+ downloads = get_spdx_downloads(file_path)
427
+ if downloads:
428
+ spdx_downloads[rel_path_file] = downloads
429
+
430
+ if is_manifest_file(file_path):
431
+ licenses = get_manifest_licenses(file_path)
432
+ if licenses:
433
+ manifest_licenses[rel_path_file] = licenses
434
+
435
+ return spdx_downloads, manifest_licenses
436
+
437
+
388
438
  if __name__ == '__main__':
389
439
  main()
@@ -0,0 +1,251 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright (c) 2025 LG Electronics Inc.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+ import os
6
+ import json
7
+ import re
8
+ import logging
9
+ from fosslight_util.get_pom_license import get_license_from_pom
10
+ import fosslight_util.constant as constant
11
+
12
+ logger = logging.getLogger(constant.LOGGER_NAME)
13
+
14
+
15
+ def _split_spdx_expression(value: str) -> list[str]:
16
+ parts = re.split(r'\s+(?:OR|AND)\s+|[|]{2}|&&', value, flags=re.IGNORECASE)
17
+ tokens: list[str] = []
18
+ for part in parts:
19
+ token = part.strip().strip('()')
20
+ if token:
21
+ tokens.append(token)
22
+ unique: list[str] = []
23
+ for t in tokens:
24
+ if t not in unique:
25
+ unique.append(t)
26
+ return unique
27
+
28
+
29
+ def get_licenses_from_package_json(file_path: str) -> list[str]:
30
+ try:
31
+ with open(file_path, 'r', encoding='utf-8') as f:
32
+ data = json.load(f)
33
+ except Exception as ex:
34
+ logger.info(f"Failed to read package.json {file_path}: {ex}")
35
+ return []
36
+
37
+ if not isinstance(data, dict):
38
+ return []
39
+
40
+ licenses: list[str] = []
41
+ license_field = data.get('license')
42
+
43
+ if isinstance(license_field, str):
44
+ value = license_field.strip()
45
+ if value.upper() == 'UNLICENSED':
46
+ return []
47
+ if value.upper().startswith('SEE LICENSE IN'):
48
+ return []
49
+ licenses.extend(_split_spdx_expression(value))
50
+ elif isinstance(license_field, dict):
51
+ type_val = license_field.get('type')
52
+ if isinstance(type_val, str):
53
+ type_val = type_val.strip()
54
+ if type_val and type_val.upper() != 'UNLICENSED':
55
+ licenses.append(type_val)
56
+
57
+ if not licenses:
58
+ legacy = data.get('licenses')
59
+ if isinstance(legacy, list):
60
+ for item in legacy:
61
+ if isinstance(item, str):
62
+ token = item.strip()
63
+ if token and token.upper() != 'UNLICENSED':
64
+ licenses.append(token)
65
+ elif isinstance(item, dict):
66
+ t = item.get('type')
67
+ if isinstance(t, str):
68
+ t = t.strip()
69
+ if t and t.upper() != 'UNLICENSED':
70
+ licenses.append(t)
71
+
72
+ unique: list[str] = []
73
+ for lic in licenses:
74
+ if lic not in unique:
75
+ unique.append(lic)
76
+ return unique
77
+
78
+
79
+ def get_licenses_from_setup_cfg(file_path: str) -> list[str]:
80
+ try:
81
+ import configparser
82
+ parser = configparser.ConfigParser()
83
+ parser.read(file_path, encoding='utf-8')
84
+ if parser.has_section('metadata'):
85
+ license_value = parser.get('metadata', 'license', fallback='').strip()
86
+ if license_value:
87
+ return _split_spdx_expression(license_value)
88
+ except Exception as ex:
89
+ logger.info(f"Failed to parse setup.cfg with configparser for {file_path}: {ex}")
90
+
91
+ try:
92
+ with open(file_path, 'r', encoding='utf-8') as f:
93
+ content = f.read()
94
+ meta_match = re.search(r'^\s*\[metadata\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
95
+ if not meta_match:
96
+ return []
97
+ block = meta_match.group(1)
98
+ m = re.search(r'^\s*license\s*=\s*(.+)$', block, flags=re.MULTILINE)
99
+ if not m:
100
+ return []
101
+ val = m.group(1).strip()
102
+ if (len(val) >= 2) and ((val[0] == val[-1]) and val[0] in ('"', "'")):
103
+ val = val[1:-1].strip()
104
+ if not val:
105
+ return []
106
+ return _split_spdx_expression(val)
107
+ except Exception as ex:
108
+ logger.info(f"Failed to parse setup.cfg {file_path} via regex fallback: {ex}")
109
+ return []
110
+
111
+
112
+ def get_licenses_from_setup_py(file_path: str) -> list[str]:
113
+ try:
114
+ with open(file_path, 'r', encoding='utf-8') as f:
115
+ content = f.read()
116
+ except Exception as ex:
117
+ logger.info(f"Failed to read setup.py {file_path}: {ex}")
118
+ return []
119
+
120
+ match = re.search(r'license\s*=\s*([\'"]{1,3})(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
121
+ if not match:
122
+ return []
123
+ value = match.group(2).strip()
124
+ if not value:
125
+ return []
126
+
127
+ return _split_spdx_expression(value)
128
+
129
+
130
+ def get_licenses_from_podspec(file_path: str) -> list[str]:
131
+ try:
132
+ with open(file_path, 'r', encoding='utf-8') as f:
133
+ content = f.read()
134
+ except Exception as ex:
135
+ logger.info(f"Failed to read podspec {file_path}: {ex}")
136
+ return []
137
+
138
+ m = re.search(r'\blicense\s*=\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE)
139
+ if m:
140
+ value = m.group(2).strip()
141
+ if value:
142
+ return _split_spdx_expression(value)
143
+
144
+ m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*([\'"])(.+?)\1', content, flags=re.IGNORECASE | re.DOTALL)
145
+ if m:
146
+ value = m.group(2).strip()
147
+ if value:
148
+ return _split_spdx_expression(value)
149
+
150
+ m = re.search(r'\blicense\s*=\s*\{[^}]*?:type\s*=>\s*:(\w+)', content, flags=re.IGNORECASE | re.DOTALL)
151
+ if m:
152
+ value = m.group(1).strip()
153
+ if value:
154
+ return _split_spdx_expression(value)
155
+
156
+ m = re.search(r'\blicense\s*=\s*:(\w+)', content, flags=re.DOTALL | re.IGNORECASE)
157
+ if m:
158
+ value = m.group(1).strip()
159
+ if value:
160
+ return _split_spdx_expression(value)
161
+
162
+ return []
163
+
164
+
165
+ def get_licenses_from_cargo_toml(file_path: str) -> list[str]:
166
+ try:
167
+ data = None
168
+ try:
169
+ import tomllib as toml_loader # Python 3.11+
170
+ with open(file_path, 'rb') as f:
171
+ data = toml_loader.load(f)
172
+ except Exception:
173
+ try:
174
+ import tomli as toml_loader # Backport
175
+ with open(file_path, 'rb') as f:
176
+ data = toml_loader.load(f)
177
+ except Exception:
178
+ data = None
179
+
180
+ if isinstance(data, dict):
181
+ package_tbl = data.get('package') or {}
182
+ license_value = package_tbl.get('license')
183
+ if isinstance(license_value, str) and license_value.strip():
184
+ return _split_spdx_expression(license_value.strip())
185
+ if package_tbl.get('license-file'):
186
+ return []
187
+ except Exception as ex:
188
+ logger.info(f"Failed to parse Cargo.toml via toml parser for {file_path}: {ex}")
189
+
190
+ try:
191
+ with open(file_path, 'r', encoding='utf-8') as f:
192
+ content = f.read()
193
+ pkg_match = re.search(r'^\s*\[package\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL)
194
+ if not pkg_match:
195
+ return []
196
+ block = pkg_match.group(1)
197
+ m = re.search(r'^\s*license\s*=\s*(?P<q>"""|\'\'\'|"|\')(?P<val>.*?)(?P=q)', block, flags=re.MULTILINE | re.DOTALL)
198
+ if m:
199
+ val = m.group('val').strip()
200
+ if val:
201
+ return _split_spdx_expression(val)
202
+ m2 = re.search(r'^\s*license-file\s*=\s*(?:"""|\'\'\'|"|\')(.*?)(?:"""|\'\'\'|"|\')', block,
203
+ flags=re.MULTILINE | re.DOTALL)
204
+ if m2:
205
+ return []
206
+ except Exception as ex:
207
+ logger.info(f"Failed to parse Cargo.toml {file_path}: {ex}")
208
+ return []
209
+ return []
210
+
211
+
212
+ def get_manifest_licenses(file_path: str) -> list[str]:
213
+ if file_path.endswith('.pom'):
214
+ try:
215
+ pom_licenses = get_license_from_pom(group_id='', artifact_id='', version='', pom_path=file_path, check_parent=True)
216
+ if not pom_licenses:
217
+ return []
218
+ return [x.strip() for x in pom_licenses.split(', ') if x.strip()]
219
+ except Exception as ex:
220
+ logger.info(f"Failed to extract license from POM {file_path}: {ex}")
221
+ return []
222
+ elif os.path.basename(file_path).lower() == 'package.json':
223
+ try:
224
+ return get_licenses_from_package_json(file_path)
225
+ except Exception as ex:
226
+ logger.info(f"Failed to extract license from package.json {file_path}: {ex}")
227
+ return []
228
+ elif os.path.basename(file_path).lower() == 'setup.cfg':
229
+ try:
230
+ return get_licenses_from_setup_cfg(file_path)
231
+ except Exception as ex:
232
+ logger.info(f"Failed to extract license from setup.cfg {file_path}: {ex}")
233
+ return []
234
+ elif os.path.basename(file_path).lower() == 'setup.py':
235
+ try:
236
+ return get_licenses_from_setup_py(file_path)
237
+ except Exception as ex:
238
+ logger.info(f"Failed to extract license from setup.py {file_path}: {ex}")
239
+ return []
240
+ elif os.path.basename(file_path).lower().endswith('.podspec'):
241
+ try:
242
+ return get_licenses_from_podspec(file_path)
243
+ except Exception as ex:
244
+ logger.info(f"Failed to extract license from podspec {file_path}: {ex}")
245
+ return []
246
+ elif os.path.basename(file_path).lower() == 'cargo.toml':
247
+ try:
248
+ return get_licenses_from_cargo_toml(file_path)
249
+ except Exception as ex:
250
+ logger.info(f"Failed to extract license from Cargo.toml {file_path}: {ex}")
251
+ return []
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright (c) 2023 LG Electronics Inc.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+
6
+ import os
7
+ import logging
8
+ import re
9
+ import fosslight_util.constant as constant
10
+ import mmap
11
+
12
+ logger = logging.getLogger(constant.LOGGER_NAME)
13
+
14
+
15
+ def get_spdx_downloads(file_path: str) -> list[str]:
16
+ results = []
17
+ find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
18
+ try:
19
+ if os.path.getsize(file_path) > 0:
20
+ with open(file_path, "r") as f:
21
+ with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
22
+ for word in find_word.findall(mmap_obj):
23
+ results.append(word.decode('utf-8'))
24
+ except Exception as ex:
25
+ logger.warning(f"Failed to extract SPDX download location. {file_path}, {ex}")
26
+ return results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.1
3
+ Version: 2.2.2
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -10,6 +10,7 @@ src/fosslight_source/_parsing_scancode_file_item.py
10
10
  src/fosslight_source/_parsing_scanoss_file.py
11
11
  src/fosslight_source/_scan_item.py
12
12
  src/fosslight_source/cli.py
13
+ src/fosslight_source/run_manifest_extractor.py
13
14
  src/fosslight_source/run_scancode.py
14
15
  src/fosslight_source/run_scanoss.py
15
16
  src/fosslight_source/run_spdx_extractor.py
@@ -1,37 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- # Copyright (c) 2023 LG Electronics Inc.
4
- # SPDX-License-Identifier: Apache-2.0
5
-
6
- import os
7
- import logging
8
- import re
9
- import fosslight_util.constant as constant
10
- import mmap
11
-
12
- logger = logging.getLogger(constant.LOGGER_NAME)
13
-
14
-
15
- def get_spdx_downloads(path_to_scan: str, path_to_exclude: set = None) -> dict:
16
- download_dict = {}
17
- find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
18
- abs_path_to_scan = os.path.abspath(path_to_scan)
19
-
20
- for root, dirs, files in os.walk(path_to_scan):
21
- for file in files:
22
- file_path = os.path.join(root, file)
23
- rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
24
- if rel_path_file in path_to_exclude:
25
- continue
26
- try:
27
- if os.path.getsize(file_path) > 0:
28
- with open(file_path, "r") as f:
29
- with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
30
- for word in find_word.findall(mmap_obj):
31
- if rel_path_file in download_dict:
32
- download_dict[rel_path_file].append(word.decode('utf-8'))
33
- else:
34
- download_dict[rel_path_file] = [word.decode('utf-8')]
35
- except Exception as ex:
36
- logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {ex}")
37
- return download_dict