fosslight-source 2.1.19__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fosslight_source/_help.py +1 -1
- fosslight_source/_parsing_scancode_file_item.py +15 -31
- fosslight_source/_parsing_scanoss_file.py +4 -18
- fosslight_source/_scan_item.py +103 -73
- fosslight_source/cli.py +54 -42
- fosslight_source/run_scancode.py +33 -32
- fosslight_source/run_scanoss.py +9 -22
- fosslight_source/run_spdx_extractor.py +18 -31
- {fosslight_source-2.1.19.dist-info → fosslight_source-2.2.1.dist-info}/METADATA +2 -2
- fosslight_source-2.2.1.dist-info/RECORD +16 -0
- fosslight_source-2.1.19.dist-info/RECORD +0 -16
- {fosslight_source-2.1.19.dist-info → fosslight_source-2.2.1.dist-info}/WHEEL +0 -0
- {fosslight_source-2.1.19.dist-info → fosslight_source-2.2.1.dist-info}/entry_points.txt +0 -0
- {fosslight_source-2.1.19.dist-info → fosslight_source-2.2.1.dist-info}/licenses/LICENSE +0 -0
- {fosslight_source-2.1.19.dist-info → fosslight_source-2.2.1.dist-info}/top_level.txt +0 -0
fosslight_source/_help.py
CHANGED
|
@@ -27,7 +27,7 @@ _HELP_MESSAGE_SOURCE_SCANNER = f"""
|
|
|
27
27
|
\t\t\t ({', '.join(SUPPORT_FORMAT)})
|
|
28
28
|
\t\t\t Multiple formats can be specified separated by space.
|
|
29
29
|
Options only for FOSSLight Source Scanner
|
|
30
|
-
-s <scanner>\t Select which scanner to be run (scancode, scanoss, all)
|
|
30
|
+
-s <scanner>\t Select which scanner to be run (scancode, scanoss, kb, all)
|
|
31
31
|
-j\t\t\t Generate raw result of scanners in json format
|
|
32
32
|
-t <float>\t\t Stop scancode scanning if scanning takes longer than a timeout in seconds.
|
|
33
33
|
-c <core>\t\t Select the number of cores to be scanned with ScanCode or threads with SCANOSS.
|
|
@@ -10,12 +10,9 @@ import fosslight_util.constant as constant
|
|
|
10
10
|
from fosslight_util.get_pom_license import get_license_from_pom
|
|
11
11
|
from ._license_matched import MatchedLicense
|
|
12
12
|
from ._scan_item import SourceItem
|
|
13
|
-
from ._scan_item import is_exclude_dir
|
|
14
|
-
from ._scan_item import is_exclude_file
|
|
15
13
|
from ._scan_item import replace_word
|
|
16
14
|
from ._scan_item import is_notice_file
|
|
17
15
|
from ._scan_item import is_manifest_file
|
|
18
|
-
from ._scan_item import is_package_dir
|
|
19
16
|
from typing import Tuple
|
|
20
17
|
|
|
21
18
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
@@ -29,6 +26,14 @@ SPDX_REPLACE_WORDS = ["(", ")"]
|
|
|
29
26
|
KEY_AND = r"(?<=\s)and(?=\s)"
|
|
30
27
|
KEY_OR = r"(?<=\s)or(?=\s)"
|
|
31
28
|
GPL_LICENSE_PATTERN = r'((a|l)?gpl|gfdl)' # GPL, LGPL, AGPL, GFDL
|
|
29
|
+
SOURCE_EXTENSIONS = [
|
|
30
|
+
'.java', '.cpp', '.c', '.cc', '.cxx', '.c++', '.h', '.hh', '.hpp', '.hxx', '.h++',
|
|
31
|
+
'.cs', '.py', '.pyw', '.js', '.jsx', '.mjs', '.cjs', '.ts', '.tsx',
|
|
32
|
+
'.go', '.rs', '.rb', '.php', '.swift', '.kt', '.kts', '.scala', '.sc',
|
|
33
|
+
'.m', '.mm', '.dart', '.lua', '.pl', '.pm', '.r', '.R',
|
|
34
|
+
'.hs', '.clj', '.cljs', '.ex', '.exs', '.groovy', '.gradle',
|
|
35
|
+
'.vue', '.svelte', '.asm', '.s', '.i', '.ii'
|
|
36
|
+
]
|
|
32
37
|
|
|
33
38
|
|
|
34
39
|
def is_gpl_family_license(licenses: list) -> bool:
|
|
@@ -75,8 +80,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
75
80
|
msg = []
|
|
76
81
|
scancode_file_item = []
|
|
77
82
|
license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
|
|
78
|
-
prev_dir = ""
|
|
79
|
-
prev_dir_value = False
|
|
80
83
|
|
|
81
84
|
if scancode_file_list:
|
|
82
85
|
for file in scancode_file_list:
|
|
@@ -88,22 +91,11 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
88
91
|
is_binary = file.get("is_binary", False)
|
|
89
92
|
if "type" in file:
|
|
90
93
|
is_dir = file["type"] == "directory"
|
|
91
|
-
if is_dir:
|
|
92
|
-
prev_dir_value = is_exclude_dir(file_path)
|
|
93
|
-
prev_dir = file_path
|
|
94
|
-
|
|
95
94
|
if not is_binary and not is_dir:
|
|
96
95
|
licenses = file.get("licenses", [])
|
|
97
96
|
copyright_list = file.get("copyrights", [])
|
|
98
97
|
|
|
99
98
|
result_item = SourceItem(file_path)
|
|
100
|
-
is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
|
|
101
|
-
if is_pkg:
|
|
102
|
-
result_item.source_name_or_path = pkg_path
|
|
103
|
-
if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
|
|
104
|
-
result_item.exclude = True
|
|
105
|
-
scancode_file_item.append(result_item)
|
|
106
|
-
continue
|
|
107
99
|
|
|
108
100
|
if has_error and "scan_errors" in file:
|
|
109
101
|
error_msg = file.get("scan_errors", [])
|
|
@@ -230,8 +222,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
|
|
|
230
222
|
set(license_expression_list))
|
|
231
223
|
result_item.comment = ','.join(license_expression_list)
|
|
232
224
|
|
|
233
|
-
if is_exclude_file(file_path, prev_dir, prev_dir_value):
|
|
234
|
-
result_item.exclude = True
|
|
235
225
|
scancode_file_item.append(result_item)
|
|
236
226
|
except Exception as ex:
|
|
237
227
|
msg.append(f"Error Parsing item: {ex}")
|
|
@@ -263,17 +253,9 @@ def parsing_scancode_32_later(
|
|
|
263
253
|
is_binary = file.get("is_binary", False)
|
|
264
254
|
is_dir = file.get("type", "") == "directory"
|
|
265
255
|
if (not file_path) or is_binary or is_dir:
|
|
256
|
+
logger.info(f"Skipping {file_path} because it is binary or directory")
|
|
266
257
|
continue
|
|
267
|
-
|
|
268
258
|
result_item = SourceItem(file_path)
|
|
269
|
-
is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
|
|
270
|
-
if is_pkg:
|
|
271
|
-
result_item.source_name_or_path = pkg_path
|
|
272
|
-
if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
|
|
273
|
-
result_item.exclude = True
|
|
274
|
-
scancode_file_item.append(result_item)
|
|
275
|
-
continue
|
|
276
|
-
|
|
277
259
|
if has_error:
|
|
278
260
|
error_msg = file.get("scan_errors", [])
|
|
279
261
|
if error_msg:
|
|
@@ -326,9 +308,11 @@ def parsing_scancode_32_later(
|
|
|
326
308
|
license_list[lic_matched_key] = lic_info
|
|
327
309
|
license_detected.append(found_lic)
|
|
328
310
|
result_item.licenses = license_detected
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
result_item.is_license_text =
|
|
311
|
+
file_ext = os.path.splitext(file_path)[1].lower()
|
|
312
|
+
is_source_file = file_ext and file_ext in SOURCE_EXTENSIONS
|
|
313
|
+
result_item.is_license_text = is_notice_file(file_path) or (
|
|
314
|
+
file.get("percentage_of_license_text", 0) > 90 and not is_source_file
|
|
315
|
+
)
|
|
332
316
|
|
|
333
317
|
detected_without_pom = []
|
|
334
318
|
if is_manifest_file(file_path) and len(license_detected) > 0:
|
|
@@ -371,7 +355,7 @@ def parsing_scancode_32_later(
|
|
|
371
355
|
license_expression = file.get("detected_license_expression", "")
|
|
372
356
|
if license_expression_spdx:
|
|
373
357
|
license_expression = license_expression_spdx
|
|
374
|
-
if license_expression:
|
|
358
|
+
if license_expression and "OR" in license_expression:
|
|
375
359
|
result_item.comment = license_expression
|
|
376
360
|
|
|
377
361
|
scancode_file_item.append(result_item)
|
|
@@ -3,12 +3,9 @@
|
|
|
3
3
|
# Copyright (c) 2020 LG Electronics Inc.
|
|
4
4
|
# SPDX-License-Identifier: Apache-2.0
|
|
5
5
|
|
|
6
|
-
import os
|
|
7
6
|
import logging
|
|
8
7
|
import fosslight_util.constant as constant
|
|
9
8
|
from ._scan_item import SourceItem
|
|
10
|
-
from ._scan_item import is_exclude_file
|
|
11
|
-
from ._scan_item import is_package_dir
|
|
12
9
|
from ._scan_item import replace_word
|
|
13
10
|
from typing import Tuple
|
|
14
11
|
|
|
@@ -18,7 +15,7 @@ SCANOSS_INFO_HEADER = ['No', 'Source Path', 'Component Declared', 'SPDX Tag',
|
|
|
18
15
|
'Matched Rate (line number)', 'scanoss_fileURL']
|
|
19
16
|
|
|
20
17
|
|
|
21
|
-
def
|
|
18
|
+
def parsing_extra_info(scanned_result: dict) -> list:
|
|
22
19
|
scanoss_extra_info = []
|
|
23
20
|
for scan_item in scanned_result:
|
|
24
21
|
license_w_source = scan_item.scanoss_reference
|
|
@@ -37,22 +34,14 @@ def parsing_extraInfo(scanned_result: dict) -> list:
|
|
|
37
34
|
return scanoss_extra_info
|
|
38
35
|
|
|
39
36
|
|
|
40
|
-
def
|
|
37
|
+
def parsing_scan_result(scanoss_report: dict, excluded_files: set = None) -> Tuple[bool, list]:
|
|
41
38
|
scanoss_file_item = []
|
|
42
|
-
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
|
|
43
39
|
|
|
44
40
|
for file_path, findings in scanoss_report.items():
|
|
45
|
-
|
|
46
|
-
if
|
|
41
|
+
file_path_normalized = file_path.replace('\\', '/')
|
|
42
|
+
if file_path_normalized in excluded_files:
|
|
47
43
|
continue
|
|
48
44
|
result_item = SourceItem(file_path)
|
|
49
|
-
is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
|
|
50
|
-
if is_pkg:
|
|
51
|
-
result_item.source_name_or_path = pkg_path
|
|
52
|
-
if not any(x.source_name_or_path == result_item.source_name_or_path for x in scanoss_file_item):
|
|
53
|
-
result_item.exclude = True
|
|
54
|
-
scanoss_file_item.append(result_item)
|
|
55
|
-
continue
|
|
56
45
|
|
|
57
46
|
if 'id' in findings[0]:
|
|
58
47
|
if "none" == findings[0]['id']:
|
|
@@ -86,9 +75,6 @@ def parsing_scanResult(scanoss_report: dict, path_to_scan: str = "", path_to_exc
|
|
|
86
75
|
result_item.licenses = license_detected
|
|
87
76
|
result_item.scanoss_reference = license_w_source
|
|
88
77
|
|
|
89
|
-
if is_exclude_file(file_path):
|
|
90
|
-
result_item.exclude = True
|
|
91
|
-
|
|
92
78
|
if 'file_url' in findings[0]:
|
|
93
79
|
result_item.fileURL = findings[0]['file_url']
|
|
94
80
|
if 'matched' in findings[0]:
|
fosslight_source/_scan_item.py
CHANGED
|
@@ -6,6 +6,10 @@
|
|
|
6
6
|
import os
|
|
7
7
|
import logging
|
|
8
8
|
import re
|
|
9
|
+
import json
|
|
10
|
+
import hashlib
|
|
11
|
+
import urllib.request
|
|
12
|
+
import urllib.error
|
|
9
13
|
import fosslight_util.constant as constant
|
|
10
14
|
from fosslight_util.oss_item import FileItem, OssItem, get_checksum_sha1
|
|
11
15
|
|
|
@@ -15,17 +19,10 @@ _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'c
|
|
|
15
19
|
'[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
|
|
16
20
|
'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
|
|
17
21
|
_manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'pubspec\.yaml$', r'.*\.podspec$', r'Cargo\.toml$']
|
|
18
|
-
_exclude_filename = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
|
|
19
|
-
"configure", "configure.ac", "depcomp", "compile", "missing", "makefile"]
|
|
20
|
-
_exclude_extension = [".m4", ".in", ".po"]
|
|
21
|
-
_exclude_directory = ["test", "tests", "doc", "docs"]
|
|
22
|
-
_exclude_directory = [os.path.sep + dir_name +
|
|
23
|
-
os.path.sep for dir_name in _exclude_directory]
|
|
24
|
-
_exclude_directory.append("/.")
|
|
25
|
-
_package_directory = ["node_modules", "venv", "Pods", "Carthage"]
|
|
26
22
|
MAX_LICENSE_LENGTH = 200
|
|
27
23
|
MAX_LICENSE_TOTAL_LENGTH = 600
|
|
28
24
|
SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
|
|
25
|
+
KB_URL = "http://fosslight-kb.lge.com/query"
|
|
29
26
|
|
|
30
27
|
|
|
31
28
|
class SourceItem(FileItem):
|
|
@@ -77,7 +74,90 @@ class SourceItem(FileItem):
|
|
|
77
74
|
else:
|
|
78
75
|
self._licenses = value
|
|
79
76
|
|
|
80
|
-
def
|
|
77
|
+
def _get_md5_hash(self, path_to_scan: str = "") -> str:
|
|
78
|
+
try:
|
|
79
|
+
file_path = self.source_name_or_path
|
|
80
|
+
if path_to_scan and not os.path.isabs(file_path):
|
|
81
|
+
file_path = os.path.join(path_to_scan, file_path)
|
|
82
|
+
file_path = os.path.normpath(file_path)
|
|
83
|
+
|
|
84
|
+
if os.path.isfile(file_path):
|
|
85
|
+
md5_hash = hashlib.md5()
|
|
86
|
+
with open(file_path, "rb") as f:
|
|
87
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
88
|
+
md5_hash.update(chunk)
|
|
89
|
+
return md5_hash.hexdigest()
|
|
90
|
+
except FileNotFoundError:
|
|
91
|
+
logger.warning(f"File not found: {self.source_name_or_path}")
|
|
92
|
+
except PermissionError:
|
|
93
|
+
logger.warning(f"Permission denied: {self.source_name_or_path}")
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.warning(f"Failed to compute MD5 for {self.source_name_or_path}: {e}")
|
|
96
|
+
return ""
|
|
97
|
+
|
|
98
|
+
def _get_origin_url_from_md5_hash(self, md5_hash: str) -> str:
|
|
99
|
+
try:
|
|
100
|
+
request = urllib.request.Request(KB_URL, data=json.dumps({"file_hash": md5_hash}).encode('utf-8'), method='POST')
|
|
101
|
+
request.add_header('Accept', 'application/json')
|
|
102
|
+
request.add_header('Content-Type', 'application/json')
|
|
103
|
+
|
|
104
|
+
with urllib.request.urlopen(request, timeout=10) as response:
|
|
105
|
+
data = json.loads(response.read().decode())
|
|
106
|
+
if isinstance(data, dict):
|
|
107
|
+
# Only extract output if return_code is 0 (success)
|
|
108
|
+
return_code = data.get('return_code', -1)
|
|
109
|
+
if return_code == 0:
|
|
110
|
+
output = data.get('output', '')
|
|
111
|
+
if output:
|
|
112
|
+
return output
|
|
113
|
+
except urllib.error.URLError as e:
|
|
114
|
+
logger.warning(f"Failed to fetch origin_url from API for MD5 hash {md5_hash}: {e}")
|
|
115
|
+
except json.JSONDecodeError as e:
|
|
116
|
+
logger.warning(f"Failed to parse API response for MD5 hash {md5_hash}: {e}")
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.warning(f"Error getting origin_url for MD5 hash {md5_hash}: {e}")
|
|
119
|
+
return ""
|
|
120
|
+
|
|
121
|
+
def _extract_oss_info_from_url(self, url: str) -> tuple:
|
|
122
|
+
"""
|
|
123
|
+
Extract OSS name, version, and repository URL from GitHub URL.
|
|
124
|
+
|
|
125
|
+
Supported patterns:
|
|
126
|
+
- https://github.com/{owner}/{repo}/archive/{version}.zip
|
|
127
|
+
- https://github.com/{owner}/{repo}/archive/{tag}/{version}.zip
|
|
128
|
+
- https://github.com/{owner}/{repo}/releases/download/{version}/{filename}
|
|
129
|
+
|
|
130
|
+
:param url: GitHub URL to extract information from
|
|
131
|
+
:return: tuple of (repo_name, version, repo_url)
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
repo_match = re.search(r'github\.com/([^/]+)/([^/]+)/', url)
|
|
135
|
+
if not repo_match:
|
|
136
|
+
return "", "", ""
|
|
137
|
+
|
|
138
|
+
owner = repo_match.group(1)
|
|
139
|
+
repo_name = repo_match.group(2)
|
|
140
|
+
repo_url = f"https://github.com/{owner}/{repo_name}"
|
|
141
|
+
version = ""
|
|
142
|
+
# Extract version from releases pattern first: /releases/download/{version}/
|
|
143
|
+
releases_match = re.search(r'/releases/download/([^/]+)/', url)
|
|
144
|
+
if releases_match:
|
|
145
|
+
version = releases_match.group(1)
|
|
146
|
+
else:
|
|
147
|
+
# Extract version from archive pattern: /archive/{version}.zip or /archive/{tag}/{version}.zip
|
|
148
|
+
# For pattern with tag, take the last segment before .zip
|
|
149
|
+
archive_match = re.search(r'/archive/(.+?)(?:\.zip|\.tar\.gz)?(?:[?#]|$)', url)
|
|
150
|
+
if archive_match:
|
|
151
|
+
version_path = archive_match.group(1)
|
|
152
|
+
version = version_path.split('/')[-1] if '/' in version_path else version_path
|
|
153
|
+
if re.match(r'^[0-9a-f]{7,40}$', version, re.IGNORECASE):
|
|
154
|
+
version = ""
|
|
155
|
+
return repo_name, version, repo_url
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.debug(f"Failed to extract OSS info from URL {url}: {e}")
|
|
158
|
+
return "", "", ""
|
|
159
|
+
|
|
160
|
+
def set_oss_item(self, path_to_scan: str = "", run_kb: bool = False) -> None:
|
|
81
161
|
self.oss_items = []
|
|
82
162
|
if self.download_location:
|
|
83
163
|
for url in self.download_location:
|
|
@@ -87,6 +167,20 @@ class SourceItem(FileItem):
|
|
|
87
167
|
self.oss_items.append(item)
|
|
88
168
|
else:
|
|
89
169
|
item = OssItem(self.oss_name, self.oss_version, self.licenses)
|
|
170
|
+
if run_kb and not self.is_license_text:
|
|
171
|
+
md5_hash = self._get_md5_hash(path_to_scan)
|
|
172
|
+
if md5_hash:
|
|
173
|
+
origin_url = self._get_origin_url_from_md5_hash(md5_hash)
|
|
174
|
+
if origin_url:
|
|
175
|
+
extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
|
|
176
|
+
if extracted_name:
|
|
177
|
+
self.oss_name = extracted_name
|
|
178
|
+
if extracted_version:
|
|
179
|
+
self.oss_version = extracted_version
|
|
180
|
+
download_url = repo_url if repo_url else origin_url
|
|
181
|
+
self.download_location = [download_url]
|
|
182
|
+
item = OssItem(self.oss_name, self.oss_version, self.licenses, download_url)
|
|
183
|
+
|
|
90
184
|
item.copyright = "\n".join(self.copyright)
|
|
91
185
|
item.comment = self.comment
|
|
92
186
|
self.oss_items.append(item)
|
|
@@ -107,39 +201,6 @@ class SourceItem(FileItem):
|
|
|
107
201
|
return self.source_name_or_path == other.source_name_or_path
|
|
108
202
|
|
|
109
203
|
|
|
110
|
-
def is_exclude_dir(dir_path: str) -> bool:
|
|
111
|
-
if dir_path:
|
|
112
|
-
dir_path = dir_path.lower()
|
|
113
|
-
dir_path = dir_path if dir_path.endswith(
|
|
114
|
-
os.path.sep) else dir_path + os.path.sep
|
|
115
|
-
dir_path = dir_path if dir_path.startswith(
|
|
116
|
-
os.path.sep) else os.path.sep + dir_path
|
|
117
|
-
return any(dir_name in dir_path for dir_name in _exclude_directory)
|
|
118
|
-
return False
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def is_exclude_file(file_path: str, prev_dir: str = None, prev_dir_exclude_value: bool = None) -> bool:
|
|
122
|
-
file_path = file_path.lower()
|
|
123
|
-
filename = os.path.basename(file_path)
|
|
124
|
-
if os.path.splitext(filename)[1] in _exclude_extension:
|
|
125
|
-
return True
|
|
126
|
-
if filename.startswith('.') or filename in _exclude_filename:
|
|
127
|
-
return True
|
|
128
|
-
|
|
129
|
-
dir_path = os.path.dirname(file_path)
|
|
130
|
-
if prev_dir is not None: # running ScanCode
|
|
131
|
-
if dir_path == prev_dir:
|
|
132
|
-
return prev_dir_exclude_value
|
|
133
|
-
else:
|
|
134
|
-
# There will be no execution of this else statement.
|
|
135
|
-
# Because scancode json output results are sorted by path,
|
|
136
|
-
# most of them will match the previous if statement.
|
|
137
|
-
return is_exclude_dir(dir_path)
|
|
138
|
-
else: # running SCANOSS
|
|
139
|
-
return is_exclude_dir(dir_path)
|
|
140
|
-
return False
|
|
141
|
-
|
|
142
|
-
|
|
143
204
|
def is_notice_file(file_path: str) -> bool:
|
|
144
205
|
pattern = r"({})(?<!w)".format("|".join(_notice_filename))
|
|
145
206
|
filename = os.path.basename(file_path)
|
|
@@ -150,34 +211,3 @@ def is_manifest_file(file_path: str) -> bool:
|
|
|
150
211
|
pattern = r"({})$".format("|".join(_manifest_filename))
|
|
151
212
|
filename = os.path.basename(file_path)
|
|
152
213
|
return bool(re.match(pattern, filename, re.IGNORECASE))
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def is_package_dir(dir_path: str) -> bool:
|
|
156
|
-
# scancode and scanoss use '/' as path separator regardless of OS
|
|
157
|
-
dir_path = dir_path.replace('\\', '/')
|
|
158
|
-
path_parts = dir_path.split('/')
|
|
159
|
-
|
|
160
|
-
for pkg_dir in _package_directory:
|
|
161
|
-
if pkg_dir in path_parts:
|
|
162
|
-
pkg_index = path_parts.index(pkg_dir)
|
|
163
|
-
pkg_path = '/'.join(path_parts[:pkg_index + 1])
|
|
164
|
-
return True, pkg_path
|
|
165
|
-
return False, ""
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def get_excluded_paths(path_to_scan: str, custom_excluded_paths: list = []) -> list:
|
|
169
|
-
path_to_exclude = custom_excluded_paths.copy()
|
|
170
|
-
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
171
|
-
|
|
172
|
-
for root, dirs, files in os.walk(path_to_scan):
|
|
173
|
-
for dir_name in dirs:
|
|
174
|
-
dir_path = os.path.join(root, dir_name)
|
|
175
|
-
rel_path = os.path.relpath(dir_path, abs_path_to_scan)
|
|
176
|
-
if dir_name in _package_directory:
|
|
177
|
-
if rel_path not in path_to_exclude:
|
|
178
|
-
path_to_exclude.append(rel_path)
|
|
179
|
-
elif is_exclude_dir(rel_path):
|
|
180
|
-
if rel_path not in path_to_exclude:
|
|
181
|
-
path_to_exclude.append(rel_path)
|
|
182
|
-
|
|
183
|
-
return path_to_exclude
|
fosslight_source/cli.py
CHANGED
|
@@ -8,23 +8,24 @@ import os
|
|
|
8
8
|
import platform
|
|
9
9
|
import warnings
|
|
10
10
|
import logging
|
|
11
|
+
import urllib.request
|
|
12
|
+
import urllib.error
|
|
11
13
|
from datetime import datetime
|
|
12
14
|
import fosslight_util.constant as constant
|
|
13
15
|
from fosslight_util.set_log import init_log
|
|
14
16
|
from fosslight_util.timer_thread import TimerThread
|
|
15
|
-
from fosslight_util.exclude import excluding_files
|
|
16
17
|
from ._help import print_version, print_help_msg_source_scanner
|
|
17
18
|
from ._license_matched import get_license_list_to_print
|
|
18
19
|
from fosslight_util.output_format import check_output_formats_v2, write_output_file
|
|
19
20
|
from fosslight_util.correct import correct_with_yaml
|
|
20
21
|
from .run_scancode import run_scan
|
|
21
|
-
from .
|
|
22
|
+
from fosslight_util.exclude import get_excluded_paths
|
|
22
23
|
from .run_scanoss import run_scanoss_py
|
|
23
24
|
from .run_scanoss import get_scanoss_extra_info
|
|
24
25
|
import yaml
|
|
25
26
|
import argparse
|
|
26
27
|
from .run_spdx_extractor import get_spdx_downloads
|
|
27
|
-
from ._scan_item import SourceItem
|
|
28
|
+
from ._scan_item import SourceItem, KB_URL
|
|
28
29
|
from fosslight_util.oss_item import ScannerItem
|
|
29
30
|
from typing import Tuple
|
|
30
31
|
|
|
@@ -35,7 +36,10 @@ SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
|
35
36
|
MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
|
|
36
37
|
'OSS Version', 'License', 'Download Location',
|
|
37
38
|
'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
|
|
38
|
-
SCANNER_TYPE = ['
|
|
39
|
+
SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
|
|
40
|
+
EXCLUDE_FILENAME = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
|
|
41
|
+
"configure", "configure.ac", "depcomp", "compile", "missing", "Makefile"]
|
|
42
|
+
EXCLUDE_FILE_EXTENSION = [".m4", ".in", ".po"]
|
|
39
43
|
|
|
40
44
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
41
45
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
@@ -124,23 +128,6 @@ def main() -> None:
|
|
|
124
128
|
sys.exit(1)
|
|
125
129
|
|
|
126
130
|
|
|
127
|
-
def count_files(path_to_scan: str, path_to_exclude: list) -> Tuple[int, int]:
|
|
128
|
-
total_files = 0
|
|
129
|
-
excluded_files = 0
|
|
130
|
-
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
|
|
131
|
-
|
|
132
|
-
for root, _, files in os.walk(path_to_scan):
|
|
133
|
-
for file in files:
|
|
134
|
-
file_path = os.path.join(root, file)
|
|
135
|
-
abs_file_path = os.path.abspath(file_path)
|
|
136
|
-
if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
|
|
137
|
-
for exclude_path in abs_path_to_exclude):
|
|
138
|
-
excluded_files += 1
|
|
139
|
-
total_files += 1
|
|
140
|
-
|
|
141
|
-
return total_files, excluded_files
|
|
142
|
-
|
|
143
|
-
|
|
144
131
|
def create_report_file(
|
|
145
132
|
_start_time: str, merged_result: list,
|
|
146
133
|
license_list: list, scanoss_result: list,
|
|
@@ -148,7 +135,7 @@ def create_report_file(
|
|
|
148
135
|
output_path: str = "", output_files: list = [],
|
|
149
136
|
output_extensions: list = [], correct_mode: bool = True,
|
|
150
137
|
correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
|
|
151
|
-
formats: list = [],
|
|
138
|
+
formats: list = [], api_limit_exceed: bool = False, files_count: int = 0
|
|
152
139
|
) -> 'ScannerItem':
|
|
153
140
|
"""
|
|
154
141
|
Create report files for given scanned result.
|
|
@@ -207,7 +194,6 @@ def create_report_file(
|
|
|
207
194
|
|
|
208
195
|
scan_item = ScannerItem(PKG_NAME, _start_time)
|
|
209
196
|
scan_item.set_cover_pathinfo(path_to_scan, path_to_exclude)
|
|
210
|
-
files_count, _ = count_files(path_to_scan, path_to_exclude)
|
|
211
197
|
scan_item.set_cover_comment(f"Scanned files: {files_count}")
|
|
212
198
|
|
|
213
199
|
if api_limit_exceed:
|
|
@@ -221,12 +207,6 @@ def create_report_file(
|
|
|
221
207
|
|
|
222
208
|
if merged_result:
|
|
223
209
|
sheet_list = {}
|
|
224
|
-
# Remove results that are in excluding file list
|
|
225
|
-
for i in range(len(merged_result) - 1, -1, -1): # Iterate from last to first
|
|
226
|
-
item_path = merged_result[i].source_name_or_path # Assuming SourceItem has 'file_path' attribute
|
|
227
|
-
if item_path in excluded_file_list:
|
|
228
|
-
del merged_result[i] # Delete matching item
|
|
229
|
-
|
|
230
210
|
scan_item.append_file_items(merged_result, PKG_NAME)
|
|
231
211
|
|
|
232
212
|
if selected_scanner == 'scanoss':
|
|
@@ -269,17 +249,35 @@ def create_report_file(
|
|
|
269
249
|
return scan_item
|
|
270
250
|
|
|
271
251
|
|
|
272
|
-
def
|
|
252
|
+
def check_kb_server_reachable() -> bool:
|
|
253
|
+
try:
|
|
254
|
+
request = urllib.request.Request(KB_URL, method='HEAD')
|
|
255
|
+
with urllib.request.urlopen(request, timeout=5) as response:
|
|
256
|
+
logger.debug(f"KB server is reachable. Response status: {response.status}")
|
|
257
|
+
return response.status != 404
|
|
258
|
+
except urllib.error.HTTPError as e:
|
|
259
|
+
return e.code != 404
|
|
260
|
+
except urllib.error.URLError:
|
|
261
|
+
return False
|
|
262
|
+
except Exception:
|
|
263
|
+
return False
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def merge_results(
|
|
267
|
+
scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
|
|
268
|
+
path_to_scan: str = "", run_kb: bool = False
|
|
269
|
+
) -> list:
|
|
273
270
|
|
|
274
271
|
"""
|
|
275
272
|
Merge scanner results and spdx parsing result.
|
|
276
273
|
:param scancode_result: list of scancode results in SourceItem.
|
|
277
274
|
:param scanoss_result: list of scanoss results in SourceItem.
|
|
278
275
|
:param spdx_downloads: dictionary of spdx parsed results.
|
|
276
|
+
:param path_to_scan: path to the scanned directory for constructing absolute file paths.
|
|
277
|
+
:param run_kb: if True, load kb result.
|
|
279
278
|
:return merged_result: list of merged result in SourceItem.
|
|
280
279
|
"""
|
|
281
280
|
|
|
282
|
-
# If anything that is found at SCANOSS only exist, add it to result.
|
|
283
281
|
scancode_result.extend([item for item in scanoss_result if item not in scancode_result])
|
|
284
282
|
|
|
285
283
|
# If download loc. in SPDX form found, overwrite the scanner result.
|
|
@@ -293,9 +291,15 @@ def merge_results(scancode_result: list = [], scanoss_result: list = [], spdx_do
|
|
|
293
291
|
new_result_item = SourceItem(file_name)
|
|
294
292
|
new_result_item.download_location = download_location
|
|
295
293
|
scancode_result.append(new_result_item)
|
|
294
|
+
if run_kb and not check_kb_server_reachable():
|
|
295
|
+
run_kb = False
|
|
296
|
+
if run_kb:
|
|
297
|
+
logger.info("KB server is reachable. Loading data from OSS KB.")
|
|
298
|
+
else:
|
|
299
|
+
logger.info("Skipping KB lookup.")
|
|
296
300
|
|
|
297
301
|
for item in scancode_result:
|
|
298
|
-
item.set_oss_item()
|
|
302
|
+
item.set_oss_item(path_to_scan, run_kb)
|
|
299
303
|
|
|
300
304
|
return scancode_result
|
|
301
305
|
|
|
@@ -339,30 +343,38 @@ def run_scanners(
|
|
|
339
343
|
|
|
340
344
|
logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
|
|
341
345
|
True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
|
|
342
|
-
excluded_file_list = excluding_files(path_to_exclude, path_to_scan)
|
|
343
346
|
|
|
344
347
|
if '.xlsx' not in output_extensions and print_matched_text:
|
|
345
348
|
logger.warning("-m option is only available for excel.")
|
|
346
349
|
print_matched_text = False
|
|
347
350
|
|
|
348
351
|
if success:
|
|
349
|
-
|
|
350
|
-
|
|
352
|
+
path_to_exclude_with_filename = path_to_exclude + EXCLUDE_FILENAME
|
|
353
|
+
excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped = (
|
|
354
|
+
get_excluded_paths(path_to_scan, path_to_exclude_with_filename, EXCLUDE_FILE_EXTENSION))
|
|
355
|
+
logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
|
|
356
|
+
|
|
357
|
+
if not selected_scanner:
|
|
358
|
+
selected_scanner = 'all'
|
|
359
|
+
if selected_scanner in ['scancode', 'all', 'kb']:
|
|
351
360
|
success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
|
|
352
361
|
write_json_file, num_cores, True,
|
|
353
362
|
print_matched_text, formats, called_by_cli,
|
|
354
363
|
time_out, correct_mode, correct_filepath,
|
|
355
|
-
excluded_path_with_default_exclusion
|
|
356
|
-
|
|
364
|
+
excluded_path_with_default_exclusion,
|
|
365
|
+
excluded_files)
|
|
366
|
+
excluded_files = set(excluded_files) if excluded_files else set()
|
|
367
|
+
if selected_scanner in ['scanoss', 'all']:
|
|
357
368
|
scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True, write_json_file,
|
|
358
|
-
num_cores, excluded_path_with_default_exclusion)
|
|
369
|
+
num_cores, excluded_path_with_default_exclusion, excluded_files)
|
|
359
370
|
if selected_scanner in SCANNER_TYPE:
|
|
360
|
-
|
|
361
|
-
|
|
371
|
+
run_kb = True if selected_scanner in ['kb', 'all'] else False
|
|
372
|
+
spdx_downloads = get_spdx_downloads(path_to_scan, excluded_files)
|
|
373
|
+
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads, path_to_scan, run_kb)
|
|
362
374
|
scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
|
|
363
375
|
print_matched_text, output_path, output_files, output_extensions, correct_mode,
|
|
364
|
-
correct_filepath, path_to_scan,
|
|
365
|
-
api_limit_exceed)
|
|
376
|
+
correct_filepath, path_to_scan, excluded_path_without_dot, formats,
|
|
377
|
+
api_limit_exceed, cnt_file_except_skipped)
|
|
366
378
|
else:
|
|
367
379
|
print_help_msg_source_scanner()
|
|
368
380
|
result_log[RESULT_KEY] = "Unsupported scanner"
|
fosslight_source/run_scancode.py
CHANGED
|
@@ -29,7 +29,8 @@ def run_scan(
|
|
|
29
29
|
return_results: bool = False, need_license: bool = False,
|
|
30
30
|
formats: list = [], called_by_cli: bool = False,
|
|
31
31
|
time_out: int = 120, correct_mode: bool = True,
|
|
32
|
-
correct_filepath: str = "", path_to_exclude: list = []
|
|
32
|
+
correct_filepath: str = "", path_to_exclude: list = [],
|
|
33
|
+
excluded_files: list = []
|
|
33
34
|
) -> Tuple[bool, str, list, list]:
|
|
34
35
|
if not called_by_cli:
|
|
35
36
|
global logger
|
|
@@ -74,7 +75,6 @@ def run_scan(
|
|
|
74
75
|
if os.path.isdir(path_to_scan):
|
|
75
76
|
try:
|
|
76
77
|
time_out = float(time_out)
|
|
77
|
-
logger.debug(f"Skipped by Scancode: {path_to_exclude}")
|
|
78
78
|
pretty_params = {}
|
|
79
79
|
pretty_params["path_to_scan"] = path_to_scan
|
|
80
80
|
pretty_params["path_to_exclude"] = path_to_exclude
|
|
@@ -91,39 +91,40 @@ def run_scan(
|
|
|
91
91
|
exclude_path_normalized = os.path.normpath(exclude_path).replace("\\", "/")
|
|
92
92
|
|
|
93
93
|
if exclude_path_normalized.endswith("/**"):
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
is_dir = os.path.isdir(full_exclude_path)
|
|
103
|
-
is_file = os.path.isfile(full_exclude_path)
|
|
104
|
-
if is_dir:
|
|
105
|
-
dir_name = os.path.basename(exclude_path_normalized.rstrip("/"))
|
|
106
|
-
base_path = exclude_path_normalized.rstrip("/")
|
|
107
|
-
|
|
108
|
-
if dir_name:
|
|
109
|
-
total_files_to_excluded.append(dir_name)
|
|
110
|
-
max_depth = 0
|
|
111
|
-
for root, dirs, files in os.walk(full_exclude_path):
|
|
112
|
-
depth = root[len(full_exclude_path):].count(os.sep)
|
|
113
|
-
max_depth = max(max_depth, depth)
|
|
114
|
-
for depth in range(1, max_depth + 2):
|
|
115
|
-
pattern = base_path + "/*" * depth
|
|
116
|
-
total_files_to_excluded.append(pattern)
|
|
94
|
+
base_dir = exclude_path_normalized[:-3].rstrip("/")
|
|
95
|
+
if base_dir:
|
|
96
|
+
full_exclude_path = os.path.join(abs_path_to_scan, base_dir)
|
|
97
|
+
if os.path.isdir(full_exclude_path):
|
|
98
|
+
total_files_to_excluded.append(base_dir)
|
|
99
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
100
|
+
else:
|
|
101
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
117
102
|
else:
|
|
118
103
|
total_files_to_excluded.append(exclude_path_normalized)
|
|
119
|
-
elif is_file:
|
|
120
|
-
total_files_to_excluded.append(exclude_path_normalized)
|
|
121
104
|
else:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
105
|
+
has_glob_chars = any(char in exclude_path_normalized for char in ['*', '?', '['])
|
|
106
|
+
if not has_glob_chars:
|
|
107
|
+
full_exclude_path = os.path.join(abs_path_to_scan, exclude_path_normalized)
|
|
108
|
+
is_dir = os.path.isdir(full_exclude_path)
|
|
109
|
+
is_file = os.path.isfile(full_exclude_path)
|
|
110
|
+
else:
|
|
111
|
+
is_dir = False
|
|
112
|
+
is_file = False
|
|
113
|
+
|
|
114
|
+
if is_dir:
|
|
115
|
+
base_path = exclude_path_normalized.rstrip("/")
|
|
116
|
+
if base_path:
|
|
117
|
+
total_files_to_excluded.append(base_path)
|
|
118
|
+
total_files_to_excluded.append(f"{base_path}/**")
|
|
119
|
+
else:
|
|
120
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
121
|
+
elif is_file:
|
|
122
|
+
total_files_to_excluded.append(f"**/{exclude_path_normalized}")
|
|
123
|
+
else:
|
|
124
|
+
total_files_to_excluded.append(exclude_path_normalized)
|
|
125
|
+
|
|
126
|
+
if excluded_files:
|
|
127
|
+
total_files_to_excluded.extend(f"**/{file_path}" for file_path in excluded_files)
|
|
127
128
|
|
|
128
129
|
total_files_to_excluded = sorted(list(set(total_files_to_excluded)))
|
|
129
130
|
ignore_tuple = tuple(total_files_to_excluded)
|
fosslight_source/run_scanoss.py
CHANGED
|
@@ -12,8 +12,8 @@ from datetime import datetime
|
|
|
12
12
|
import fosslight_util.constant as constant
|
|
13
13
|
from fosslight_util.set_log import init_log
|
|
14
14
|
from fosslight_util.output_format import check_output_formats_v2 # , write_output_file
|
|
15
|
-
from ._parsing_scanoss_file import
|
|
16
|
-
from ._parsing_scanoss_file import
|
|
15
|
+
from ._parsing_scanoss_file import parsing_scan_result # scanoss
|
|
16
|
+
from ._parsing_scanoss_file import parsing_extra_info # scanoss
|
|
17
17
|
import shutil
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
from scanoss.scanner import Scanner, ScanType
|
|
@@ -28,11 +28,12 @@ SCANOSS_OUTPUT_FILE = "scanoss_raw_result.json"
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def get_scanoss_extra_info(scanned_result: dict) -> list:
|
|
31
|
-
return
|
|
31
|
+
return parsing_extra_info(scanned_result)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
|
|
35
|
-
|
|
34
|
+
def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
|
|
35
|
+
called_by_cli: bool = False, write_json_file: bool = False, num_threads: int = -1,
|
|
36
|
+
path_to_exclude: list = [], excluded_files: set = None) -> list:
|
|
36
37
|
"""
|
|
37
38
|
Run scanoss.py for the given path.
|
|
38
39
|
|
|
@@ -72,7 +73,7 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
|
|
|
72
73
|
try:
|
|
73
74
|
scanner = Scanner(
|
|
74
75
|
ignore_cert_errors=True,
|
|
75
|
-
skip_folders=path_to_exclude,
|
|
76
|
+
skip_folders=list(path_to_exclude) if path_to_exclude else [],
|
|
76
77
|
scan_output=output_json_file,
|
|
77
78
|
scan_options=ScanType.SCAN_SNIPPETS.value,
|
|
78
79
|
nb_threads=num_threads if num_threads > 0 else 10
|
|
@@ -86,30 +87,16 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
|
|
|
86
87
|
logger.debug(f"{captured_output}")
|
|
87
88
|
|
|
88
89
|
if os.path.isfile(output_json_file):
|
|
89
|
-
total_files_to_excluded = []
|
|
90
|
-
if path_to_exclude:
|
|
91
|
-
for path in path_to_exclude:
|
|
92
|
-
path = os.path.join(path_to_scan, os.path.relpath(path, os.path.abspath(path_to_scan))) \
|
|
93
|
-
if not os.path.isabs(path_to_scan) and os.path.isabs(path) else os.path.join(path_to_scan, path)
|
|
94
|
-
if os.path.isdir(path):
|
|
95
|
-
for root, _, files in os.walk(path):
|
|
96
|
-
root = root[len(path_to_scan) + 1:]
|
|
97
|
-
total_files_to_excluded.extend([os.path.normpath(os.path.join(root, file)).replace('\\', '/')
|
|
98
|
-
for file in files])
|
|
99
|
-
elif os.path.isfile(path):
|
|
100
|
-
path = path[len(path_to_scan) + 1:]
|
|
101
|
-
total_files_to_excluded.append(os.path.normpath(path).replace('\\', '/'))
|
|
102
|
-
|
|
103
90
|
with open(output_json_file, "r") as st_json:
|
|
104
91
|
st_python = json.load(st_json)
|
|
105
|
-
for key_to_exclude in
|
|
92
|
+
for key_to_exclude in excluded_files:
|
|
106
93
|
if key_to_exclude in st_python:
|
|
107
94
|
del st_python[key_to_exclude]
|
|
108
95
|
with open(output_json_file, 'w') as st_json:
|
|
109
96
|
json.dump(st_python, st_json, indent=4)
|
|
110
97
|
with open(output_json_file, "r") as st_json:
|
|
111
98
|
st_python = json.load(st_json)
|
|
112
|
-
scanoss_file_list =
|
|
99
|
+
scanoss_file_list = parsing_scan_result(st_python, excluded_files)
|
|
113
100
|
|
|
114
101
|
except Exception as error:
|
|
115
102
|
logger.debug(f"SCANOSS Parsing {path_to_scan}: {error}")
|
|
@@ -12,39 +12,26 @@ import mmap
|
|
|
12
12
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
def get_spdx_downloads(path_to_scan: str, path_to_exclude: set = None) -> dict:
|
|
16
|
+
download_dict = {}
|
|
17
|
+
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
|
|
18
|
+
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
19
|
+
|
|
18
20
|
for root, dirs, files in os.walk(path_to_scan):
|
|
19
21
|
for file in files:
|
|
20
22
|
file_path = os.path.join(root, file)
|
|
21
|
-
|
|
22
|
-
if
|
|
23
|
-
for exclude_path in abs_path_to_exclude):
|
|
23
|
+
rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
|
|
24
|
+
if rel_path_file in path_to_exclude:
|
|
24
25
|
continue
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
try:
|
|
37
|
-
rel_path_file = os.path.relpath(file, path_to_scan)
|
|
38
|
-
# remove the path_to_scan from the file paths
|
|
39
|
-
if os.path.getsize(file) > 0:
|
|
40
|
-
with open(file, "r") as f:
|
|
41
|
-
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
42
|
-
for word in find_word.findall(mmap_obj):
|
|
43
|
-
if rel_path_file in download_dict:
|
|
44
|
-
download_dict[rel_path_file].append(word.decode('utf-8'))
|
|
45
|
-
else:
|
|
46
|
-
download_dict[rel_path_file] = [word.decode('utf-8')]
|
|
47
|
-
except Exception as ex:
|
|
48
|
-
msg = str(ex)
|
|
49
|
-
logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {msg}")
|
|
26
|
+
try:
|
|
27
|
+
if os.path.getsize(file_path) > 0:
|
|
28
|
+
with open(file_path, "r") as f:
|
|
29
|
+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
30
|
+
for word in find_word.findall(mmap_obj):
|
|
31
|
+
if rel_path_file in download_dict:
|
|
32
|
+
download_dict[rel_path_file].append(word.decode('utf-8'))
|
|
33
|
+
else:
|
|
34
|
+
download_dict[rel_path_file] = [word.decode('utf-8')]
|
|
35
|
+
except Exception as ex:
|
|
36
|
+
logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {ex}")
|
|
50
37
|
return download_dict
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fosslight_source
|
|
3
|
-
Version: 2.1
|
|
3
|
+
Version: 2.2.1
|
|
4
4
|
Summary: FOSSLight Source Scanner
|
|
5
5
|
Home-page: https://github.com/fosslight/fosslight_source_scanner
|
|
6
6
|
Download-URL: https://github.com/fosslight/fosslight_source_scanner
|
|
@@ -17,7 +17,7 @@ License-File: LICENSE
|
|
|
17
17
|
Requires-Dist: pyparsing
|
|
18
18
|
Requires-Dist: scanoss>=1.18.0
|
|
19
19
|
Requires-Dist: XlsxWriter
|
|
20
|
-
Requires-Dist: fosslight_util>=2.1.
|
|
20
|
+
Requires-Dist: fosslight_util>=2.1.34
|
|
21
21
|
Requires-Dist: PyYAML
|
|
22
22
|
Requires-Dist: wheel>=0.38.1
|
|
23
23
|
Requires-Dist: intbitset
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
fosslight_source/_help.py,sha256=Ge6g9GKWGza11E74PFnBMqsj40UCUz-a_xArDZ1FClU,2316
|
|
3
|
+
fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
|
|
4
|
+
fosslight_source/_parsing_scancode_file_item.py,sha256=-shPakF0oQWDzxWFylE2dQ93O4tgCudYM2zvX4K5glQ,19386
|
|
5
|
+
fosslight_source/_parsing_scanoss_file.py,sha256=L3iHqmQF2jeSpHYuYSre44doXKy-BoX0u1Lm2IfJSU8,3866
|
|
6
|
+
fosslight_source/_scan_item.py,sha256=NMmYaqdpNM-yeJxXPVPmoPo_thOnaAGRXYDEcpD6s2Y,9415
|
|
7
|
+
fosslight_source/cli.py,sha256=qbp87Rhe5c2hIcF1-5TR6btPeOCe32Ffq1pxJM9ADcY,17303
|
|
8
|
+
fosslight_source/run_scancode.py,sha256=TFyNLV6P9rSBo9royDoG6az4l7Tkpl8Gr66IFK1DBU8,9021
|
|
9
|
+
fosslight_source/run_scanoss.py,sha256=_gdA4kOByI4saT4bDvMwIabpxtpH4f_yruHdBtb_g-o,4852
|
|
10
|
+
fosslight_source/run_spdx_extractor.py,sha256=LLyYKpkpD5Qxkv_qTNBrFtKvrmnXw9SFYZCkpishb_c,1537
|
|
11
|
+
fosslight_source-2.2.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
12
|
+
fosslight_source-2.2.1.dist-info/METADATA,sha256=dXxCjJqwvuDy1Yz4KTltcIeO6etLzTx0AbwF_x0aGkU,3557
|
|
13
|
+
fosslight_source-2.2.1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
14
|
+
fosslight_source-2.2.1.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
|
|
15
|
+
fosslight_source-2.2.1.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
|
|
16
|
+
fosslight_source-2.2.1.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
fosslight_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
fosslight_source/_help.py,sha256=QuoQvxBPV00IfuD2ft88uRQXMSdrL2rJB7CQr05w3Ng,2312
|
|
3
|
-
fosslight_source/_license_matched.py,sha256=-3H881XQjFDafRttBsuboS3VbCPYEvPH1pwWXptknE4,2164
|
|
4
|
-
fosslight_source/_parsing_scancode_file_item.py,sha256=VZf_-5f7DZi8Zkj9Bx9LETTxcW-9f0KyNQD_DVOUNes,20024
|
|
5
|
-
fosslight_source/_parsing_scanoss_file.py,sha256=0f5JzjnFU-kcPZRX7OKnextyvANjKwwNZeyCJVC7eME,4624
|
|
6
|
-
fosslight_source/_scan_item.py,sha256=5HWJ8j58snEjTqzYtKRB8RVfywVrzivkJQ6WMh7nBwA,7299
|
|
7
|
-
fosslight_source/cli.py,sha256=ApdTDaLEuH1LskLtcMrLyeRDIgIJUZlOp3RrWaju2Pc,16854
|
|
8
|
-
fosslight_source/run_scancode.py,sha256=h8HWoZr5R17kXCYjiR56ZTdpDwpFKPAurpUpjTvT424,9006
|
|
9
|
-
fosslight_source/run_scanoss.py,sha256=8wu3sa-YBqjfb5x2dbDJuAdw3rrExueOW23WdzqDCaU,5721
|
|
10
|
-
fosslight_source/run_spdx_extractor.py,sha256=Hr9sTv06cJaVITy8amwexIW2FV8_rUcFw6hKmR9ZYws,1990
|
|
11
|
-
fosslight_source-2.1.19.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
12
|
-
fosslight_source-2.1.19.dist-info/METADATA,sha256=E_y220fkjF5KUrZUglR9MscDfD9AnBX5h63kO1FkAOg,3558
|
|
13
|
-
fosslight_source-2.1.19.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
14
|
-
fosslight_source-2.1.19.dist-info/entry_points.txt,sha256=G4bBRWqSrJ68g-2M-JtNDrSZsdym_M7_KohQ2qR1vG8,113
|
|
15
|
-
fosslight_source-2.1.19.dist-info/top_level.txt,sha256=C2vw-0OIent84Vq-UEk1gt_kK1EL8dIItzBzp3WNyA4,17
|
|
16
|
-
fosslight_source-2.1.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|