fosslight-source 2.1.18__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fosslight_source/_help.py CHANGED
@@ -27,7 +27,7 @@ _HELP_MESSAGE_SOURCE_SCANNER = f"""
27
27
  \t\t\t ({', '.join(SUPPORT_FORMAT)})
28
28
  \t\t\t Multiple formats can be specified separated by space.
29
29
  Options only for FOSSLight Source Scanner
30
- -s <scanner>\t Select which scanner to be run (scancode, scanoss, all)
30
+ -s <scanner>\t Select which scanner to be run (scancode, scanoss, kb, all)
31
31
  -j\t\t\t Generate raw result of scanners in json format
32
32
  -t <float>\t\t Stop scancode scanning if scanning takes longer than a timeout in seconds.
33
33
  -c <core>\t\t Select the number of cores to be scanned with ScanCode or threads with SCANOSS.
@@ -29,6 +29,14 @@ SPDX_REPLACE_WORDS = ["(", ")"]
29
29
  KEY_AND = r"(?<=\s)and(?=\s)"
30
30
  KEY_OR = r"(?<=\s)or(?=\s)"
31
31
  GPL_LICENSE_PATTERN = r'((a|l)?gpl|gfdl)' # GPL, LGPL, AGPL, GFDL
32
+ SOURCE_EXTENSIONS = [
33
+ '.java', '.cpp', '.c', '.cc', '.cxx', '.c++', '.h', '.hh', '.hpp', '.hxx', '.h++',
34
+ '.cs', '.py', '.pyw', '.js', '.jsx', '.mjs', '.cjs', '.ts', '.tsx',
35
+ '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.kts', '.scala', '.sc',
36
+ '.m', '.mm', '.dart', '.lua', '.pl', '.pm', '.r', '.R',
37
+ '.hs', '.clj', '.cljs', '.ex', '.exs', '.groovy', '.gradle',
38
+ '.vue', '.svelte', '.asm', '.s', '.i', '.ii'
39
+ ]
32
40
 
33
41
 
34
42
  def is_gpl_family_license(licenses: list) -> bool:
@@ -328,7 +336,11 @@ def parsing_scancode_32_later(
328
336
  result_item.licenses = license_detected
329
337
 
330
338
  result_item.exclude = is_exclude_file(file_path)
331
- result_item.is_license_text = file.get("percentage_of_license_text", 0) > 90 or is_notice_file(file_path)
339
+ file_ext = os.path.splitext(file_path)[1].lower()
340
+ is_source_file = file_ext and file_ext in SOURCE_EXTENSIONS
341
+ result_item.is_license_text = is_notice_file(file_path) or (
342
+ file.get("percentage_of_license_text", 0) > 90 and not is_source_file
343
+ )
332
344
 
333
345
  detected_without_pom = []
334
346
  if is_manifest_file(file_path) and len(license_detected) > 0:
@@ -371,7 +383,7 @@ def parsing_scancode_32_later(
371
383
  license_expression = file.get("detected_license_expression", "")
372
384
  if license_expression_spdx:
373
385
  license_expression = license_expression_spdx
374
- if license_expression:
386
+ if license_expression and "OR" in license_expression:
375
387
  result_item.comment = license_expression
376
388
 
377
389
  scancode_file_item.append(result_item)
@@ -6,6 +6,10 @@
6
6
  import os
7
7
  import logging
8
8
  import re
9
+ import json
10
+ import hashlib
11
+ import urllib.request
12
+ import urllib.error
9
13
  import fosslight_util.constant as constant
10
14
  from fosslight_util.oss_item import FileItem, OssItem, get_checksum_sha1
11
15
 
@@ -26,6 +30,7 @@ _package_directory = ["node_modules", "venv", "Pods", "Carthage"]
26
30
  MAX_LICENSE_LENGTH = 200
27
31
  MAX_LICENSE_TOTAL_LENGTH = 600
28
32
  SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
33
+ KB_URL = "http://fosslight-kb.lge.com/query"
29
34
 
30
35
 
31
36
  class SourceItem(FileItem):
@@ -77,7 +82,90 @@ class SourceItem(FileItem):
77
82
  else:
78
83
  self._licenses = value
79
84
 
80
- def set_oss_item(self) -> None:
85
+ def _get_md5_hash(self, path_to_scan: str = "") -> str:
86
+ try:
87
+ file_path = self.source_name_or_path
88
+ if path_to_scan and not os.path.isabs(file_path):
89
+ file_path = os.path.join(path_to_scan, file_path)
90
+ file_path = os.path.normpath(file_path)
91
+
92
+ if os.path.isfile(file_path):
93
+ md5_hash = hashlib.md5()
94
+ with open(file_path, "rb") as f:
95
+ for chunk in iter(lambda: f.read(4096), b""):
96
+ md5_hash.update(chunk)
97
+ return md5_hash.hexdigest()
98
+ except FileNotFoundError:
99
+ logger.warning(f"File not found: {self.source_name_or_path}")
100
+ except PermissionError:
101
+ logger.warning(f"Permission denied: {self.source_name_or_path}")
102
+ except Exception as e:
103
+ logger.warning(f"Failed to compute MD5 for {self.source_name_or_path}: {e}")
104
+ return ""
105
+
106
+ def _get_origin_url_from_md5_hash(self, md5_hash: str) -> str:
107
+ try:
108
+ request = urllib.request.Request(KB_URL, data=json.dumps({"file_hash": md5_hash}).encode('utf-8'), method='POST')
109
+ request.add_header('Accept', 'application/json')
110
+ request.add_header('Content-Type', 'application/json')
111
+
112
+ with urllib.request.urlopen(request, timeout=10) as response:
113
+ data = json.loads(response.read().decode())
114
+ if isinstance(data, dict):
115
+ # Only extract output if return_code is 0 (success)
116
+ return_code = data.get('return_code', -1)
117
+ if return_code == 0:
118
+ output = data.get('output', '')
119
+ if output:
120
+ return output
121
+ except urllib.error.URLError as e:
122
+ logger.warning(f"Failed to fetch origin_url from API for MD5 hash {md5_hash}: {e}")
123
+ except json.JSONDecodeError as e:
124
+ logger.warning(f"Failed to parse API response for MD5 hash {md5_hash}: {e}")
125
+ except Exception as e:
126
+ logger.warning(f"Error getting origin_url for MD5 hash {md5_hash}: {e}")
127
+ return ""
128
+
129
+ def _extract_oss_info_from_url(self, url: str) -> tuple:
130
+ """
131
+ Extract OSS name, version, and repository URL from GitHub URL.
132
+
133
+ Supported patterns:
134
+ - https://github.com/{owner}/{repo}/archive/{version}.zip
135
+ - https://github.com/{owner}/{repo}/archive/{tag}/{version}.zip
136
+ - https://github.com/{owner}/{repo}/releases/download/{version}/{filename}
137
+
138
+ :param url: GitHub URL to extract information from
139
+ :return: tuple of (repo_name, version, repo_url)
140
+ """
141
+ try:
142
+ repo_match = re.search(r'github\.com/([^/]+)/([^/]+)/', url)
143
+ if not repo_match:
144
+ return "", "", ""
145
+
146
+ owner = repo_match.group(1)
147
+ repo_name = repo_match.group(2)
148
+ repo_url = f"https://github.com/{owner}/{repo_name}"
149
+ version = ""
150
+ # Extract version from releases pattern first: /releases/download/{version}/
151
+ releases_match = re.search(r'/releases/download/([^/]+)/', url)
152
+ if releases_match:
153
+ version = releases_match.group(1)
154
+ else:
155
+ # Extract version from archive pattern: /archive/{version}.zip or /archive/{tag}/{version}.zip
156
+ # For pattern with tag, take the last segment before .zip
157
+ archive_match = re.search(r'/archive/(.+?)(?:\.zip|\.tar\.gz)?(?:[?#]|$)', url)
158
+ if archive_match:
159
+ version_path = archive_match.group(1)
160
+ version = version_path.split('/')[-1] if '/' in version_path else version_path
161
+ if re.match(r'^[0-9a-f]{7,40}$', version, re.IGNORECASE):
162
+ version = ""
163
+ return repo_name, version, repo_url
164
+ except Exception as e:
165
+ logger.debug(f"Failed to extract OSS info from URL {url}: {e}")
166
+ return "", "", ""
167
+
168
+ def set_oss_item(self, path_to_scan: str = "", run_kb: bool = False) -> None:
81
169
  self.oss_items = []
82
170
  if self.download_location:
83
171
  for url in self.download_location:
@@ -87,6 +175,20 @@ class SourceItem(FileItem):
87
175
  self.oss_items.append(item)
88
176
  else:
89
177
  item = OssItem(self.oss_name, self.oss_version, self.licenses)
178
+ if run_kb and not self.is_license_text:
179
+ md5_hash = self._get_md5_hash(path_to_scan)
180
+ if md5_hash:
181
+ origin_url = self._get_origin_url_from_md5_hash(md5_hash)
182
+ if origin_url:
183
+ extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
184
+ if extracted_name:
185
+ self.oss_name = extracted_name
186
+ if extracted_version:
187
+ self.oss_version = extracted_version
188
+ download_url = repo_url if repo_url else origin_url
189
+ self.download_location = [download_url]
190
+ item = OssItem(self.oss_name, self.oss_version, self.licenses, download_url)
191
+
90
192
  item.copyright = "\n".join(self.copyright)
91
193
  item.comment = self.comment
92
194
  self.oss_items.append(item)
@@ -165,6 +267,15 @@ def is_package_dir(dir_path: str) -> bool:
165
267
  return False, ""
166
268
 
167
269
 
270
+ def _has_parent_in_exclude_list(rel_path: str, path_to_exclude: list) -> bool:
271
+ path_parts = rel_path.replace('\\', '/').split('/')
272
+ for i in range(1, len(path_parts)):
273
+ parent_path = '/'.join(path_parts[:i])
274
+ if parent_path in path_to_exclude:
275
+ return True
276
+ return False
277
+
278
+
168
279
  def get_excluded_paths(path_to_scan: str, custom_excluded_paths: list = []) -> list:
169
280
  path_to_exclude = custom_excluded_paths.copy()
170
281
  abs_path_to_scan = os.path.abspath(path_to_scan)
@@ -173,11 +284,10 @@ def get_excluded_paths(path_to_scan: str, custom_excluded_paths: list = []) -> l
173
284
  for dir_name in dirs:
174
285
  dir_path = os.path.join(root, dir_name)
175
286
  rel_path = os.path.relpath(dir_path, abs_path_to_scan)
176
- if dir_name in _package_directory:
177
- if rel_path not in path_to_exclude:
287
+ if not _has_parent_in_exclude_list(rel_path, path_to_exclude):
288
+ if dir_name in _package_directory:
178
289
  path_to_exclude.append(rel_path)
179
- elif is_exclude_dir(rel_path):
180
- if rel_path not in path_to_exclude:
290
+ elif is_exclude_dir(rel_path):
181
291
  path_to_exclude.append(rel_path)
182
292
 
183
293
  return path_to_exclude
fosslight_source/cli.py CHANGED
@@ -8,6 +8,8 @@ import os
8
8
  import platform
9
9
  import warnings
10
10
  import logging
11
+ import urllib.request
12
+ import urllib.error
11
13
  from datetime import datetime
12
14
  import fosslight_util.constant as constant
13
15
  from fosslight_util.set_log import init_log
@@ -24,7 +26,7 @@ from .run_scanoss import get_scanoss_extra_info
24
26
  import yaml
25
27
  import argparse
26
28
  from .run_spdx_extractor import get_spdx_downloads
27
- from ._scan_item import SourceItem
29
+ from ._scan_item import SourceItem, KB_URL
28
30
  from fosslight_util.oss_item import ScannerItem
29
31
  from typing import Tuple
30
32
 
@@ -35,7 +37,7 @@ SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
35
37
  MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
36
38
  'OSS Version', 'License', 'Download Location',
37
39
  'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
38
- SCANNER_TYPE = ['scancode', 'scanoss', 'all', '']
40
+ SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
39
41
 
40
42
  logger = logging.getLogger(constant.LOGGER_NAME)
41
43
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -269,17 +271,35 @@ def create_report_file(
269
271
  return scan_item
270
272
 
271
273
 
272
- def merge_results(scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {}) -> list:
274
+ def check_kb_server_reachable() -> bool:
275
+ try:
276
+ request = urllib.request.Request(KB_URL, method='HEAD')
277
+ with urllib.request.urlopen(request, timeout=5) as response:
278
+ logger.debug(f"KB server is reachable. Response status: {response.status}")
279
+ return response.status != 404
280
+ except urllib.error.HTTPError as e:
281
+ return e.code != 404
282
+ except urllib.error.URLError:
283
+ return False
284
+ except Exception:
285
+ return False
286
+
287
+
288
+ def merge_results(
289
+ scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
290
+ path_to_scan: str = "", run_kb: bool = False
291
+ ) -> list:
273
292
 
274
293
  """
275
294
  Merge scanner results and spdx parsing result.
276
295
  :param scancode_result: list of scancode results in SourceItem.
277
296
  :param scanoss_result: list of scanoss results in SourceItem.
278
297
  :param spdx_downloads: dictionary of spdx parsed results.
298
+ :param path_to_scan: path to the scanned directory for constructing absolute file paths.
299
+ :param run_kb: if True, load kb result.
279
300
  :return merged_result: list of merged result in SourceItem.
280
301
  """
281
302
 
282
- # If anything that is found at SCANOSS only exist, add it to result.
283
303
  scancode_result.extend([item for item in scanoss_result if item not in scancode_result])
284
304
 
285
305
  # If download loc. in SPDX form found, overwrite the scanner result.
@@ -293,9 +313,15 @@ def merge_results(scancode_result: list = [], scanoss_result: list = [], spdx_do
293
313
  new_result_item = SourceItem(file_name)
294
314
  new_result_item.download_location = download_location
295
315
  scancode_result.append(new_result_item)
316
+ if run_kb and not check_kb_server_reachable():
317
+ run_kb = False
318
+ if run_kb:
319
+ logger.info("KB server is reachable. Loading data from OSS KB.")
320
+ else:
321
+ logger.info("Skipping KB lookup.")
296
322
 
297
323
  for item in scancode_result:
298
- item.set_oss_item()
324
+ item.set_oss_item(path_to_scan, run_kb)
299
325
 
300
326
  return scancode_result
301
327
 
@@ -347,18 +373,22 @@ def run_scanners(
347
373
 
348
374
  if success:
349
375
  excluded_path_with_default_exclusion = get_excluded_paths(path_to_scan, path_to_exclude)
350
- if selected_scanner == 'scancode' or selected_scanner == 'all' or selected_scanner == '':
376
+ logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
377
+ if not selected_scanner:
378
+ selected_scanner = 'all'
379
+ if selected_scanner in ['scancode', 'all', 'kb']:
351
380
  success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
352
381
  write_json_file, num_cores, True,
353
382
  print_matched_text, formats, called_by_cli,
354
383
  time_out, correct_mode, correct_filepath,
355
384
  excluded_path_with_default_exclusion)
356
- if selected_scanner == 'scanoss' or selected_scanner == 'all' or selected_scanner == '':
385
+ if selected_scanner in ['scanoss', 'all']:
357
386
  scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True, write_json_file,
358
387
  num_cores, excluded_path_with_default_exclusion)
359
388
  if selected_scanner in SCANNER_TYPE:
389
+ run_kb = True if selected_scanner in ['kb', 'all'] else False
360
390
  spdx_downloads = get_spdx_downloads(path_to_scan, excluded_path_with_default_exclusion)
361
- merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads)
391
+ merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads, path_to_scan, run_kb)
362
392
  scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
363
393
  print_matched_text, output_path, output_files, output_extensions, correct_mode,
364
394
  correct_filepath, path_to_scan, path_to_exclude, formats, excluded_file_list,
@@ -74,7 +74,6 @@ def run_scan(
74
74
  if os.path.isdir(path_to_scan):
75
75
  try:
76
76
  time_out = float(time_out)
77
- logger.debug(f"Skipped by Scancode: {path_to_exclude}")
78
77
  pretty_params = {}
79
78
  pretty_params["path_to_scan"] = path_to_scan
80
79
  pretty_params["path_to_exclude"] = path_to_exclude
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.1.18
3
+ Version: 2.2.0
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -17,7 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: pyparsing
18
18
  Requires-Dist: scanoss>=1.18.0
19
19
  Requires-Dist: XlsxWriter
20
- Requires-Dist: fosslight_util>=2.1.30
20
+ Requires-Dist: fosslight_util>=2.1.31
21
21
  Requires-Dist: PyYAML
22
22
  Requires-Dist: wheel>=0.38.1
23
23
  Requires-Dist: intbitset
@@ -0,0 +1,16 @@
1
+ fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
3
+ fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
4
+ fosslight_source/_parsing_scancode_file_item.py,sha256=JjFm1rYzFnV7VOO0lyY32qfID9UlOwNCUfhxABgG6Ng,20692
5
+ fosslight_source/_parsing_scanoss_file.py,sha256=0f5JzjnFU-kcPZRX7OKnextyvANjKwwNZeyCJVC7eME,4624
6
+ fosslight_source/_scan_item.py,sha256=mPNdVdVagiPI4YlL0Nu656nU5yvWTdZTKR2SxXA8l1g,12612
7
+ fosslight_source/cli.py,sha256=x3z8NuiU7hAX8rWHqeEyAtMn5Rtw6dGk5UjvGGfIjl8,17902
8
+ fosslight_source/run_scancode.py,sha256=BcMzUEoHX4ukvj9Z2mZX-6HaY2yKk_AvQHeDzS6REPg,8934
9
+ fosslight_source/run_scanoss.py,sha256=8wu3sa-YBqjfb5x2dbDJuAdw3rrExueOW23WdzqDCaU,5721
10
+ fosslight_source/run_spdx_extractor.py,sha256=Hr9sTv06cJaVITy8amwexIW2FV8_rUcFw6hKmR9ZYws,1990
11
+ fosslight_source-2.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
+ fosslight_source-2.2.0.dist-info/METADATA,sha256=1vWU_HiBNhGVBpjym9Mz8jKf8opyCXZRPAHg1qzUy3E,3557
13
+ fosslight_source-2.2.0.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
14
+ fosslight_source-2.2.0.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
15
+ fosslight_source-2.2.0.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
16
+ fosslight_source-2.2.0.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- fosslight_source/_help.py,sha256=QuoQvxBPV00IfuD2ft88uRQXMSdrL2rJB7CQr05w3Ng,2312
3
- fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
4
- fosslight_source/_parsing_scancode_file_item.py,sha256=VZf_-5f7DZi8Zkj9Bx9LETTxcW-9f0KyNQD_DVOUNes,20024
5
- fosslight_source/_parsing_scanoss_file.py,sha256=0f5JzjnFU-kcPZRX7OKnextyvANjKwwNZeyCJVC7eME,4624
6
- fosslight_source/_scan_item.py,sha256=5HWJ8j58snEjTqzYtKRB8RVfywVrzivkJQ6WMh7nBwA,7299
7
- fosslight_source/cli.py,sha256=ApdTDaLEuH1LskLtcMrLyeRDIgIJUZlOp3RrWaju2Pc,16854
8
- fosslight_source/run_scancode.py,sha256=h8HWoZr5R17kXCYjiR56ZTdpDwpFKPAurpUpjTvT424,9006
9
- fosslight_source/run_scanoss.py,sha256=8wu3sa-YBqjfb5x2dbDJuAdw3rrExueOW23WdzqDCaU,5721
10
- fosslight_source/run_spdx_extractor.py,sha256=Hr9sTv06cJaVITy8amwexIW2FV8_rUcFw6hKmR9ZYws,1990
11
- fosslight_source-2.1.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
- fosslight_source-2.1.18.dist-info/METADATA,sha256=Iks42aiZ_NvP3ip3923gmbO5k_tZr9r0Tt0W8FZBwV0,3558
13
- fosslight_source-2.1.18.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
14
- fosslight_source-2.1.18.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
15
- fosslight_source-2.1.18.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
16
- fosslight_source-2.1.18.dist-info/RECORD,,