fosslight-util 2.0.0__py3-none-any.whl → 2.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fosslight_util/_get_downloadable_url.py +460 -68
- fosslight_util/compare_yaml.py +3 -1
- fosslight_util/constant.py +5 -1
- fosslight_util/correct.py +4 -6
- fosslight_util/download.py +286 -80
- fosslight_util/exclude.py +65 -0
- fosslight_util/help.py +14 -3
- fosslight_util/oss_item.py +24 -3
- fosslight_util/output_format.py +100 -18
- fosslight_util/set_log.py +8 -2
- fosslight_util/write_cyclonedx.py +210 -0
- fosslight_util/write_excel.py +5 -1
- fosslight_util/write_scancodejson.py +31 -14
- fosslight_util/write_spdx.py +161 -109
- {fosslight_util-2.0.0.dist-info → fosslight_util-2.1.29.dist-info}/METADATA +24 -22
- fosslight_util-2.1.29.dist-info/RECORD +32 -0
- {fosslight_util-2.0.0.dist-info → fosslight_util-2.1.29.dist-info}/WHEEL +1 -1
- {fosslight_util-2.0.0.dist-info → fosslight_util-2.1.29.dist-info}/entry_points.txt +0 -1
- fosslight_util/convert_excel_to_yaml.py +0 -69
- fosslight_util-2.0.0.dist-info/RECORD +0 -31
- {fosslight_util-2.0.0.dist-info → fosslight_util-2.1.29.dist-info/licenses}/LICENSE +0 -0
- {fosslight_util-2.0.0.dist-info → fosslight_util-2.1.29.dist-info}/top_level.txt +0 -0
fosslight_util/correct.py
CHANGED
|
@@ -61,17 +61,15 @@ def correct_with_yaml(correct_filepath, path_to_scan, scan_item):
|
|
|
61
61
|
|
|
62
62
|
yaml_path_exists = True
|
|
63
63
|
exclude_fileitems.append(idx)
|
|
64
|
-
|
|
65
|
-
if not yaml_path_exists:
|
|
64
|
+
if scanner_name == FOSSLIGHT_SOURCE and not yaml_path_exists:
|
|
66
65
|
correct_item = copy.deepcopy(yaml_file_item)
|
|
67
66
|
if os.path.exists(os.path.normpath(yaml_file_item.source_name_or_path)):
|
|
68
67
|
correct_item.comment = 'Loaded from sbom-info.yaml'
|
|
69
68
|
correct_fileitems.append(correct_item)
|
|
70
69
|
else:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
correct_fileitems.append(correct_item)
|
|
70
|
+
correct_item.exclude = True
|
|
71
|
+
correct_item.comment = 'Added by sbom-info.yaml'
|
|
72
|
+
correct_fileitems.append(correct_item)
|
|
75
73
|
if correct_fileitems:
|
|
76
74
|
scan_item.append_file_items(correct_fileitems, scanner_name)
|
|
77
75
|
find_match = True
|
fosslight_util/download.py
CHANGED
|
@@ -4,13 +4,13 @@
|
|
|
4
4
|
# SPDX-License-Identifier: Apache-2.0
|
|
5
5
|
import os
|
|
6
6
|
import sys
|
|
7
|
-
import
|
|
7
|
+
import requests
|
|
8
8
|
import tarfile
|
|
9
9
|
import zipfile
|
|
10
10
|
import logging
|
|
11
11
|
import argparse
|
|
12
12
|
import shutil
|
|
13
|
-
import
|
|
13
|
+
from git import Repo, GitCommandError, Git
|
|
14
14
|
import bz2
|
|
15
15
|
import contextlib
|
|
16
16
|
from datetime import datetime
|
|
@@ -26,9 +26,11 @@ import platform
|
|
|
26
26
|
import subprocess
|
|
27
27
|
import re
|
|
28
28
|
from typing import Tuple
|
|
29
|
+
import urllib.parse
|
|
30
|
+
import json
|
|
29
31
|
|
|
30
32
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
31
|
-
compression_extension = {".tar.bz2", ".tar.gz", ".tar.xz", ".tgz", ".tar", ".zip", ".jar", ".bz2"}
|
|
33
|
+
compression_extension = {".tar.bz2", ".tar.gz", ".tar.xz", ".tgz", ".tar", ".zip", ".jar", ".bz2", ".whl"}
|
|
32
34
|
prefix_refs = ["refs/remotes/origin/", "refs/tags/"]
|
|
33
35
|
SIGNAL_TIMEOUT = 600
|
|
34
36
|
|
|
@@ -56,6 +58,22 @@ def alarm_handler(signum, frame):
|
|
|
56
58
|
raise TimeOutException(f'Timeout ({SIGNAL_TIMEOUT} sec)', 1)
|
|
57
59
|
|
|
58
60
|
|
|
61
|
+
def is_downloadable(url):
|
|
62
|
+
try:
|
|
63
|
+
h = requests.head(url, allow_redirects=True)
|
|
64
|
+
header = h.headers
|
|
65
|
+
content_type = header.get('content-type')
|
|
66
|
+
if 'text/html' in content_type.lower():
|
|
67
|
+
return False
|
|
68
|
+
content_disposition = header.get('content-disposition')
|
|
69
|
+
if content_disposition and 'attachment' in content_disposition.lower():
|
|
70
|
+
return True
|
|
71
|
+
return True
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.warning(f"is_downloadable - failed: {e}")
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
59
77
|
def change_src_link_to_https(src_link):
|
|
60
78
|
src_link = src_link.replace("git://", "https://")
|
|
61
79
|
if src_link.endswith(".git"):
|
|
@@ -77,7 +95,7 @@ def parse_src_link(src_link):
|
|
|
77
95
|
if src_link.startswith("git://github.com/"):
|
|
78
96
|
src_link_changed = change_src_link_to_https(src_link_split[0])
|
|
79
97
|
elif src_link.startswith("git@github.com:"):
|
|
80
|
-
src_link_changed =
|
|
98
|
+
src_link_changed = src_link_split[0]
|
|
81
99
|
else:
|
|
82
100
|
if "rubygems.org" in src_link:
|
|
83
101
|
src_info["rubygems"] = True
|
|
@@ -92,39 +110,11 @@ def parse_src_link(src_link):
|
|
|
92
110
|
return src_info
|
|
93
111
|
|
|
94
112
|
|
|
95
|
-
def main():
|
|
96
|
-
parser = argparse.ArgumentParser(description='FOSSLight Downloader', prog='fosslight_download', add_help=False)
|
|
97
|
-
parser.add_argument('-h', '--help', help='Print help message', action='store_true', dest='help')
|
|
98
|
-
parser.add_argument('-s', '--source', help='Source link to download', type=str, dest='source')
|
|
99
|
-
parser.add_argument('-t', '--target_dir', help='Target directory', type=str, dest='target_dir', default="")
|
|
100
|
-
parser.add_argument('-d', '--log_dir', help='Directory to save log file', type=str, dest='log_dir', default="")
|
|
101
|
-
|
|
102
|
-
src_link = ""
|
|
103
|
-
target_dir = os.getcwd()
|
|
104
|
-
log_dir = os.getcwd()
|
|
105
|
-
|
|
106
|
-
try:
|
|
107
|
-
args = parser.parse_args()
|
|
108
|
-
except SystemExit:
|
|
109
|
-
sys.exit(0)
|
|
110
|
-
|
|
111
|
-
if args.help:
|
|
112
|
-
print_help_msg_download()
|
|
113
|
-
if args.source:
|
|
114
|
-
src_link = args.source
|
|
115
|
-
if args.target_dir:
|
|
116
|
-
target_dir = args.target_dir
|
|
117
|
-
if args.log_dir:
|
|
118
|
-
log_dir = args.log_dir
|
|
119
|
-
|
|
120
|
-
if not src_link:
|
|
121
|
-
print_help_msg_download()
|
|
122
|
-
else:
|
|
123
|
-
cli_download_and_extract(src_link, target_dir, log_dir)
|
|
124
|
-
|
|
125
|
-
|
|
126
113
|
def cli_download_and_extract(link: str, target_dir: str, log_dir: str, checkout_to: str = "",
|
|
127
|
-
compressed_only: bool = False
|
|
114
|
+
compressed_only: bool = False, ssh_key: str = "",
|
|
115
|
+
id: str = "", git_token: str = "",
|
|
116
|
+
called_cli: bool = True,
|
|
117
|
+
output: bool = False) -> Tuple[bool, str, str, str]:
|
|
128
118
|
global logger
|
|
129
119
|
|
|
130
120
|
success = True
|
|
@@ -136,6 +126,7 @@ def cli_download_and_extract(link: str, target_dir: str, log_dir: str, checkout_
|
|
|
136
126
|
datetime.now().strftime('%Y%m%d_%H-%M-%S')+".txt"
|
|
137
127
|
logger, log_item = init_log(os.path.join(log_dir, log_file_name))
|
|
138
128
|
link = link.strip()
|
|
129
|
+
is_rubygems = False
|
|
139
130
|
|
|
140
131
|
try:
|
|
141
132
|
if not link:
|
|
@@ -144,6 +135,9 @@ def cli_download_and_extract(link: str, target_dir: str, log_dir: str, checkout_
|
|
|
144
135
|
elif os.path.isfile(target_dir):
|
|
145
136
|
success = False
|
|
146
137
|
msg = f"The target directory exists as a file.: {target_dir}"
|
|
138
|
+
elif os.path.exists(link) or os.path.isdir(link) or os.path.isfile(link):
|
|
139
|
+
success = False
|
|
140
|
+
msg = f"You cannot enter a path instead of a link.: {link}"
|
|
147
141
|
else:
|
|
148
142
|
src_info = parse_src_link(link)
|
|
149
143
|
link = src_info.get("url", "")
|
|
@@ -152,12 +146,18 @@ def cli_download_and_extract(link: str, target_dir: str, log_dir: str, checkout_
|
|
|
152
146
|
is_rubygems = src_info.get("rubygems", False)
|
|
153
147
|
|
|
154
148
|
# General download (git clone, wget)
|
|
155
|
-
success_git, msg, oss_name, oss_version = download_git_clone(link, target_dir,
|
|
149
|
+
success_git, msg, oss_name, oss_version = download_git_clone(link, target_dir,
|
|
150
|
+
checkout_to,
|
|
151
|
+
tag, branch,
|
|
152
|
+
ssh_key, id, git_token,
|
|
153
|
+
called_cli)
|
|
154
|
+
link = change_ssh_link_to_https(link)
|
|
156
155
|
if (not is_rubygems) and (not success_git):
|
|
157
156
|
if os.path.isfile(target_dir):
|
|
158
157
|
shutil.rmtree(target_dir)
|
|
159
158
|
|
|
160
|
-
success, downloaded_file, msg_wget, oss_name, oss_version = download_wget(link, target_dir,
|
|
159
|
+
success, downloaded_file, msg_wget, oss_name, oss_version = download_wget(link, target_dir,
|
|
160
|
+
compressed_only, checkout_to)
|
|
161
161
|
if success:
|
|
162
162
|
success = extract_compressed_file(downloaded_file, target_dir, True, compressed_only)
|
|
163
163
|
# Download from rubygems.org
|
|
@@ -177,6 +177,17 @@ def cli_download_and_extract(link: str, target_dir: str, log_dir: str, checkout_
|
|
|
177
177
|
success = False
|
|
178
178
|
msg = str(error)
|
|
179
179
|
|
|
180
|
+
if output:
|
|
181
|
+
output_result = {
|
|
182
|
+
"success": success,
|
|
183
|
+
"message": msg,
|
|
184
|
+
"oss_name": oss_name,
|
|
185
|
+
"oss_version": oss_version
|
|
186
|
+
}
|
|
187
|
+
output_json = os.path.join(log_dir, "fosslight_download_output.json")
|
|
188
|
+
with open(output_json, 'w') as f:
|
|
189
|
+
json.dump(output_result, f, indent=4)
|
|
190
|
+
|
|
180
191
|
logger.info(f"\n* FOSSLight Downloader - Result: {success} ({msg})")
|
|
181
192
|
return success, msg, oss_name, oss_version
|
|
182
193
|
|
|
@@ -200,15 +211,60 @@ def get_ref_to_checkout(checkout_to, ref_list):
|
|
|
200
211
|
return ref_to_checkout
|
|
201
212
|
|
|
202
213
|
|
|
203
|
-
def
|
|
204
|
-
if
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
214
|
+
def get_remote_refs(git_url: str):
|
|
215
|
+
if not git_url:
|
|
216
|
+
return {"tags": [], "branches": []}
|
|
217
|
+
tags = []
|
|
218
|
+
branches = []
|
|
219
|
+
try:
|
|
220
|
+
cp = subprocess.run(["git", "ls-remote", "--tags", "--heads", git_url], capture_output=True, text=True, timeout=30)
|
|
221
|
+
if cp.returncode == 0:
|
|
222
|
+
for line in cp.stdout.splitlines():
|
|
223
|
+
parts = line.split('\t')
|
|
224
|
+
if len(parts) != 2:
|
|
225
|
+
continue
|
|
226
|
+
ref = parts[1]
|
|
227
|
+
if ref.startswith('refs/tags/'):
|
|
228
|
+
tags.append(ref[len('refs/tags/'):])
|
|
229
|
+
elif ref.startswith('refs/heads/'):
|
|
230
|
+
branches.append(ref[len('refs/heads/'):])
|
|
231
|
+
except Exception as e:
|
|
232
|
+
logger.debug(f"get_remote_refs - failed: {e}")
|
|
233
|
+
return {"tags": tags, "branches": branches}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def decide_checkout(checkout_to="", tag="", branch="", git_url=""):
|
|
237
|
+
base = checkout_to or tag or branch
|
|
238
|
+
if not base:
|
|
239
|
+
return ""
|
|
240
|
+
|
|
241
|
+
ref_dict = get_remote_refs(git_url)
|
|
242
|
+
tag_set = set(ref_dict.get("tags", []))
|
|
243
|
+
branch_set = set(ref_dict.get("branches", []))
|
|
244
|
+
|
|
245
|
+
ver_re = re.compile(r'^(?:v\.? ?)?' + re.escape(base) + r'$', re.IGNORECASE)
|
|
246
|
+
|
|
247
|
+
# tag: exact -> prefix variant -> endswith
|
|
248
|
+
if base in tag_set:
|
|
249
|
+
return base
|
|
250
|
+
tag_candidates = [c for c in tag_set if ver_re.match(c)]
|
|
251
|
+
if tag_candidates:
|
|
252
|
+
return min(tag_candidates, key=lambda x: (len(x), x.lower()))
|
|
253
|
+
tag_ends = [n for n in tag_set if n.endswith(base)]
|
|
254
|
+
if tag_ends:
|
|
255
|
+
return min(tag_ends, key=len)
|
|
256
|
+
|
|
257
|
+
# branch: exact -> prefix variant -> endswith
|
|
258
|
+
if base in branch_set:
|
|
259
|
+
return base
|
|
260
|
+
branch_candidates = [c for c in branch_set if ver_re.match(c)]
|
|
261
|
+
if branch_candidates:
|
|
262
|
+
return min(branch_candidates, key=lambda x: (len(x), x.lower()))
|
|
263
|
+
branch_ends = [n for n in branch_set if n.endswith(base)]
|
|
264
|
+
if branch_ends:
|
|
265
|
+
return min(branch_ends, key=len)
|
|
266
|
+
|
|
267
|
+
return base
|
|
212
268
|
|
|
213
269
|
|
|
214
270
|
def get_github_ossname(link):
|
|
@@ -229,15 +285,48 @@ def get_github_token(git_url):
|
|
|
229
285
|
return github_token
|
|
230
286
|
|
|
231
287
|
|
|
232
|
-
def
|
|
233
|
-
|
|
234
|
-
msg = ""
|
|
235
|
-
oss_name = get_github_ossname(git_url)
|
|
288
|
+
def download_git_repository(refs_to_checkout, git_url, target_dir, tag, called_cli=True):
|
|
289
|
+
success = False
|
|
236
290
|
oss_version = ""
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
291
|
+
|
|
292
|
+
logger.info(f"Download git url :{git_url}")
|
|
293
|
+
env = os.environ.copy()
|
|
294
|
+
if not called_cli:
|
|
295
|
+
env["GIT_TERMINAL_PROMPT"] = "0"
|
|
296
|
+
if refs_to_checkout:
|
|
297
|
+
try:
|
|
298
|
+
# gitPython uses the branch argument the same whether you check out to a branch or a tag.
|
|
299
|
+
Repo.clone_from(git_url, target_dir, branch=refs_to_checkout, env=env)
|
|
300
|
+
if any(Path(target_dir).iterdir()):
|
|
301
|
+
success = True
|
|
302
|
+
oss_version = refs_to_checkout
|
|
303
|
+
logger.info(f"Files found in {target_dir} after clone.")
|
|
304
|
+
else:
|
|
305
|
+
logger.info(f"No files found in {target_dir} after clone.")
|
|
306
|
+
success = False
|
|
307
|
+
except GitCommandError as error:
|
|
308
|
+
logger.info(f"Git checkout error:{error}")
|
|
309
|
+
success = False
|
|
310
|
+
except Exception as e:
|
|
311
|
+
logger.info(f"Repo.clone_from error:{e}")
|
|
312
|
+
success = False
|
|
313
|
+
|
|
314
|
+
if not success:
|
|
315
|
+
Repo.clone_from(git_url, target_dir, env=env)
|
|
316
|
+
if any(Path(target_dir).iterdir()):
|
|
317
|
+
success = True
|
|
318
|
+
else:
|
|
319
|
+
logger.info(f"No files found in {target_dir} after clone.")
|
|
320
|
+
success = False
|
|
321
|
+
return success, oss_version
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch="",
|
|
325
|
+
ssh_key="", id="", git_token="", called_cli=True):
|
|
326
|
+
oss_name = get_github_ossname(git_url)
|
|
327
|
+
refs_to_checkout = decide_checkout(checkout_to, tag, branch, git_url)
|
|
328
|
+
msg = ""
|
|
329
|
+
success = True
|
|
241
330
|
|
|
242
331
|
try:
|
|
243
332
|
if platform.system() != "Windows":
|
|
@@ -248,34 +337,45 @@ def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
|
|
|
248
337
|
alarm.start()
|
|
249
338
|
|
|
250
339
|
Path(target_dir).mkdir(parents=True, exist_ok=True)
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
signal.alarm(0)
|
|
340
|
+
|
|
341
|
+
if git_url.startswith("ssh:") and not ssh_key:
|
|
342
|
+
msg = "Private git needs ssh_key"
|
|
343
|
+
success = False
|
|
256
344
|
else:
|
|
257
|
-
|
|
345
|
+
if ssh_key:
|
|
346
|
+
logger.info(f"Download git with ssh_key:{git_url}")
|
|
347
|
+
git_ssh_cmd = f'ssh -i {ssh_key}'
|
|
348
|
+
with Git().custom_environment(GIT_SSH_COMMAND=git_ssh_cmd):
|
|
349
|
+
success, oss_version = download_git_repository(refs_to_checkout, git_url, target_dir, tag, called_cli)
|
|
350
|
+
else:
|
|
351
|
+
if id and git_token:
|
|
352
|
+
try:
|
|
353
|
+
m = re.match(r"^(ht|f)tp(s?)\:\/\/", git_url)
|
|
354
|
+
protocol = m.group()
|
|
355
|
+
if protocol:
|
|
356
|
+
encoded_git_token = urllib.parse.quote(git_token, safe='')
|
|
357
|
+
encoded_id = urllib.parse.quote(id, safe='')
|
|
358
|
+
git_url = git_url.replace(protocol, f"{protocol}{encoded_id}:{encoded_git_token}@")
|
|
359
|
+
except Exception as error:
|
|
360
|
+
logger.info(f"Failed to insert id, token to git url:{error}")
|
|
361
|
+
success, oss_version = download_git_repository(refs_to_checkout, git_url, target_dir, tag, called_cli)
|
|
362
|
+
|
|
363
|
+
logger.info(f"git checkout: {oss_version}")
|
|
364
|
+
refs_to_checkout = oss_version
|
|
365
|
+
|
|
366
|
+
if platform.system() != "Windows":
|
|
367
|
+
signal.alarm(0)
|
|
368
|
+
else:
|
|
369
|
+
del alarm
|
|
258
370
|
except Exception as error:
|
|
371
|
+
success = False
|
|
259
372
|
logger.warning(f"git clone - failed: {error}")
|
|
260
373
|
msg = str(error)
|
|
261
|
-
return False, msg, oss_name, oss_version
|
|
262
|
-
try:
|
|
263
|
-
if ref_to_checkout != "":
|
|
264
|
-
ref_list = [x for x in repo.references]
|
|
265
|
-
ref_to_checkout = get_ref_to_checkout(ref_to_checkout, ref_list)
|
|
266
|
-
logger.info(f"git checkout: {ref_to_checkout}")
|
|
267
|
-
repo.checkout(ref_to_checkout)
|
|
268
374
|
|
|
269
|
-
|
|
270
|
-
if ref_to_checkout.startswith(prefix_ref):
|
|
271
|
-
oss_version = ref_to_checkout[len(prefix_ref):]
|
|
272
|
-
|
|
273
|
-
except Exception as error:
|
|
274
|
-
logger.warning(f"git checkout to {ref_to_checkout} - failed: {error}")
|
|
275
|
-
return True, msg, oss_name, oss_version
|
|
375
|
+
return success, msg, oss_name, refs_to_checkout
|
|
276
376
|
|
|
277
377
|
|
|
278
|
-
def download_wget(link, target_dir, compressed_only):
|
|
378
|
+
def download_wget(link, target_dir, compressed_only, checkout_to):
|
|
279
379
|
success = False
|
|
280
380
|
msg = ""
|
|
281
381
|
oss_name = ""
|
|
@@ -292,23 +392,35 @@ def download_wget(link, target_dir, compressed_only):
|
|
|
292
392
|
|
|
293
393
|
Path(target_dir).mkdir(parents=True, exist_ok=True)
|
|
294
394
|
|
|
295
|
-
ret, new_link, oss_name, oss_version = get_downloadable_url(link)
|
|
395
|
+
ret, new_link, oss_name, oss_version, pkg_type = get_downloadable_url(link, checkout_to)
|
|
296
396
|
if ret and new_link:
|
|
297
397
|
link = new_link
|
|
298
398
|
|
|
299
399
|
if compressed_only:
|
|
400
|
+
# Check if link ends with known compression extensions
|
|
300
401
|
for ext in compression_extension:
|
|
301
402
|
if link.endswith(ext):
|
|
302
403
|
success = True
|
|
303
404
|
break
|
|
304
405
|
else:
|
|
305
|
-
|
|
406
|
+
# If get_downloadable_url found a downloadable file, proceed
|
|
407
|
+
if ret:
|
|
408
|
+
success = True
|
|
409
|
+
else:
|
|
410
|
+
# No downloadable file found in package repositories, verify link is downloadable
|
|
411
|
+
if not is_downloadable(link):
|
|
412
|
+
raise Exception('Not a downloadable link (link:{0})'.format(link))
|
|
413
|
+
success = True
|
|
306
414
|
|
|
415
|
+
# Fallback: verify link is downloadable for compressed_only case
|
|
307
416
|
if not success:
|
|
308
|
-
|
|
417
|
+
if is_downloadable(link):
|
|
418
|
+
success = True
|
|
419
|
+
else:
|
|
420
|
+
raise Exception('Not a downloadable link (link:{0})'.format(link))
|
|
309
421
|
|
|
310
422
|
logger.info(f"wget: {link}")
|
|
311
|
-
downloaded_file =
|
|
423
|
+
downloaded_file = download_file(link, target_dir)
|
|
312
424
|
if platform.system() != "Windows":
|
|
313
425
|
signal.alarm(0)
|
|
314
426
|
else:
|
|
@@ -325,6 +437,49 @@ def download_wget(link, target_dir, compressed_only):
|
|
|
325
437
|
return success, downloaded_file, msg, oss_name, oss_version
|
|
326
438
|
|
|
327
439
|
|
|
440
|
+
def download_file(url, target_dir):
|
|
441
|
+
local_path = ""
|
|
442
|
+
try:
|
|
443
|
+
try:
|
|
444
|
+
h = requests.head(url, allow_redirects=True)
|
|
445
|
+
final_url = h.url or url
|
|
446
|
+
headers = h.headers
|
|
447
|
+
except Exception:
|
|
448
|
+
final_url = url
|
|
449
|
+
headers = {}
|
|
450
|
+
|
|
451
|
+
with requests.get(final_url, stream=True, allow_redirects=True) as r:
|
|
452
|
+
r.raise_for_status()
|
|
453
|
+
|
|
454
|
+
filename = ""
|
|
455
|
+
cd = r.headers.get("Content-Disposition") or headers.get("Content-Disposition")
|
|
456
|
+
if cd:
|
|
457
|
+
m_star = re.search(r"filename\*=(?:UTF-8'')?([^;\r\n]+)", cd)
|
|
458
|
+
if m_star:
|
|
459
|
+
filename = urllib.parse.unquote(m_star.group(1).strip('"\''))
|
|
460
|
+
else:
|
|
461
|
+
m = re.search(r"filename=([^;\r\n]+)", cd)
|
|
462
|
+
if m:
|
|
463
|
+
filename = m.group(1).strip('"\'')
|
|
464
|
+
if not filename:
|
|
465
|
+
final_for_name = r.url or final_url
|
|
466
|
+
filename = os.path.basename(urllib.parse.urlparse(final_for_name).path)
|
|
467
|
+
if not filename:
|
|
468
|
+
filename = "downloaded_file"
|
|
469
|
+
if os.path.isdir(target_dir):
|
|
470
|
+
local_path = os.path.join(target_dir, filename)
|
|
471
|
+
else:
|
|
472
|
+
local_path = target_dir
|
|
473
|
+
|
|
474
|
+
with open(local_path, 'wb') as f:
|
|
475
|
+
for chunk in r.iter_content(chunk_size=8192):
|
|
476
|
+
f.write(chunk)
|
|
477
|
+
except Exception as e:
|
|
478
|
+
logger.warning(f"download_file - failed: {e}")
|
|
479
|
+
return None
|
|
480
|
+
return local_path
|
|
481
|
+
|
|
482
|
+
|
|
328
483
|
def extract_compressed_dir(src_dir, target_dir, remove_after_extract=True):
|
|
329
484
|
logger.debug(f"Extract Dir: {src_dir}")
|
|
330
485
|
try:
|
|
@@ -357,6 +512,11 @@ def extract_compressed_file(fname, extract_path, remove_after_extract=True, comp
|
|
|
357
512
|
unzip(fname, extract_path)
|
|
358
513
|
elif fname.endswith(".bz2"):
|
|
359
514
|
decompress_bz2(fname, extract_path)
|
|
515
|
+
elif fname.endswith(".whl"):
|
|
516
|
+
unzip(fname, extract_path)
|
|
517
|
+
elif fname.endswith(".crate"):
|
|
518
|
+
with contextlib.closing(tarfile.open(fname, "r:gz")) as t:
|
|
519
|
+
t.extractall(path=extract_path)
|
|
360
520
|
else:
|
|
361
521
|
is_compressed_file = False
|
|
362
522
|
if compressed_only:
|
|
@@ -443,5 +603,51 @@ def gem_download(link, target_dir, checkout_to):
|
|
|
443
603
|
return success
|
|
444
604
|
|
|
445
605
|
|
|
606
|
+
def main():
|
|
607
|
+
parser = argparse.ArgumentParser(description='FOSSLight Downloader', prog='fosslight_download', add_help=False)
|
|
608
|
+
parser.add_argument('-h', '--help', help='Print help message', action='store_true', dest='help')
|
|
609
|
+
parser.add_argument('-s', '--source', help='Source link to download', type=str, dest='source')
|
|
610
|
+
parser.add_argument('-t', '--target_dir', help='Target directory', type=str, dest='target_dir', default="")
|
|
611
|
+
parser.add_argument('-d', '--log_dir', help='Directory to save log file', type=str, dest='log_dir', default="")
|
|
612
|
+
parser.add_argument('-c', '--checkout_to', help='Checkout to branch or tag', type=str, dest='checkout_to', default="")
|
|
613
|
+
parser.add_argument('-z', '--compressed_only', help='Unzip only compressed file',
|
|
614
|
+
action='store_true', dest='compressed_only', default=False)
|
|
615
|
+
parser.add_argument('-o', '--output', help='Generate output file', action='store_true', dest='output', default=False)
|
|
616
|
+
|
|
617
|
+
src_link = ""
|
|
618
|
+
target_dir = os.getcwd()
|
|
619
|
+
log_dir = os.getcwd()
|
|
620
|
+
checkout_to = ""
|
|
621
|
+
compressed_only = False
|
|
622
|
+
output = False
|
|
623
|
+
|
|
624
|
+
try:
|
|
625
|
+
args = parser.parse_args()
|
|
626
|
+
except SystemExit:
|
|
627
|
+
sys.exit(0)
|
|
628
|
+
|
|
629
|
+
if args.help:
|
|
630
|
+
print_help_msg_download()
|
|
631
|
+
if args.source:
|
|
632
|
+
src_link = args.source
|
|
633
|
+
if args.target_dir:
|
|
634
|
+
target_dir = args.target_dir
|
|
635
|
+
if args.log_dir:
|
|
636
|
+
log_dir = args.log_dir
|
|
637
|
+
if args.checkout_to:
|
|
638
|
+
checkout_to = args.checkout_to
|
|
639
|
+
if args.compressed_only:
|
|
640
|
+
compressed_only = args.compressed_only
|
|
641
|
+
if args.output:
|
|
642
|
+
output = args.output
|
|
643
|
+
|
|
644
|
+
if not src_link:
|
|
645
|
+
print_help_msg_download()
|
|
646
|
+
else:
|
|
647
|
+
cli_download_and_extract(src_link, target_dir, log_dir, checkout_to,
|
|
648
|
+
compressed_only, "", "", "", False,
|
|
649
|
+
output)
|
|
650
|
+
|
|
651
|
+
|
|
446
652
|
if __name__ == '__main__':
|
|
447
653
|
main()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright (c) 2025 LG Electronics Inc.
|
|
4
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import fnmatch
|
|
8
|
+
from typing import List
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def excluding_files(patterns: List[str], path_to_scan: str) -> List[str]:
|
|
12
|
+
excluded_paths = set()
|
|
13
|
+
|
|
14
|
+
# Normalize patterns: e.g., 'sample/', 'sample/*' -> 'sample'
|
|
15
|
+
# Replace backslash with slash
|
|
16
|
+
normalized_patterns = []
|
|
17
|
+
for pattern in patterns:
|
|
18
|
+
pattern = pattern.replace('\\', '/')
|
|
19
|
+
if pattern.endswith('/') or pattern.endswith('/*'):
|
|
20
|
+
pattern = pattern.rstrip('/*')
|
|
21
|
+
normalized_patterns.append(pattern)
|
|
22
|
+
|
|
23
|
+
# Traverse directories
|
|
24
|
+
for root, dirs, files in os.walk(path_to_scan):
|
|
25
|
+
remove_dir_list = []
|
|
26
|
+
|
|
27
|
+
# (1) Directory matching
|
|
28
|
+
for d in dirs:
|
|
29
|
+
dir_name = d
|
|
30
|
+
dir_path = os.path.relpath(os.path.join(root, d), path_to_scan).replace('\\', '/')
|
|
31
|
+
matched = False
|
|
32
|
+
|
|
33
|
+
for pat in normalized_patterns:
|
|
34
|
+
# Match directory name
|
|
35
|
+
if fnmatch.fnmatch(dir_name, pat):
|
|
36
|
+
matched = True
|
|
37
|
+
|
|
38
|
+
# Match the full relative path
|
|
39
|
+
if not matched:
|
|
40
|
+
if fnmatch.fnmatch(dir_path, pat) or fnmatch.fnmatch(dir_path, pat + "/*"):
|
|
41
|
+
matched = True
|
|
42
|
+
|
|
43
|
+
# If matched, exclude all files under this directory and stop checking patterns
|
|
44
|
+
if matched:
|
|
45
|
+
sub_root_path = os.path.join(root, d)
|
|
46
|
+
for sr, _, sf in os.walk(sub_root_path):
|
|
47
|
+
for sub_file in sf:
|
|
48
|
+
sub_file_path = os.path.relpath(os.path.join(sr, sub_file), path_to_scan)
|
|
49
|
+
excluded_paths.add(sub_file_path.replace('\\', '/'))
|
|
50
|
+
remove_dir_list.append(d)
|
|
51
|
+
break
|
|
52
|
+
|
|
53
|
+
# (1-2) Prune matched directories from further traversal
|
|
54
|
+
for rd in remove_dir_list:
|
|
55
|
+
dirs.remove(rd)
|
|
56
|
+
|
|
57
|
+
# (2) File matching
|
|
58
|
+
for f in files:
|
|
59
|
+
file_path = os.path.relpath(os.path.join(root, f), path_to_scan).replace('\\', '/')
|
|
60
|
+
for pat in normalized_patterns:
|
|
61
|
+
if fnmatch.fnmatch(file_path, pat) or fnmatch.fnmatch(file_path, pat + "/*"):
|
|
62
|
+
excluded_paths.add(file_path)
|
|
63
|
+
break
|
|
64
|
+
|
|
65
|
+
return sorted(excluded_paths)
|
fosslight_util/help.py
CHANGED
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
# Copyright (c) 2021 LG Electronics Inc.
|
|
4
4
|
# SPDX-License-Identifier: Apache-2.0
|
|
5
5
|
import sys
|
|
6
|
-
|
|
6
|
+
try:
|
|
7
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
8
|
+
except ImportError:
|
|
9
|
+
from importlib_metadata import version, PackageNotFoundError # Python <3.8
|
|
7
10
|
|
|
8
11
|
_HELP_MESSAGE_COMMON = """
|
|
9
12
|
_______ _______ _______ _______ ___ ___ __
|
|
@@ -31,7 +34,12 @@ _HELP_MESSAGE_DOWNLOAD = """
|
|
|
31
34
|
Optional:
|
|
32
35
|
-h\t\t Print help message
|
|
33
36
|
-t\t\t Output path name
|
|
34
|
-
-d\t\t Directory name to save the log file
|
|
37
|
+
-d\t\t Directory name to save the log file
|
|
38
|
+
-s\t\t Source link to download
|
|
39
|
+
-t\t\t Directory to download source code
|
|
40
|
+
-c\t\t Checkout to branch or tag/ or version
|
|
41
|
+
-z\t\t Unzip only compressed file
|
|
42
|
+
-o\t\t Generate summary output file with this option"""
|
|
35
43
|
|
|
36
44
|
|
|
37
45
|
class PrintHelpMsg():
|
|
@@ -50,7 +58,10 @@ class PrintHelpMsg():
|
|
|
50
58
|
def print_package_version(pkg_name: str, msg: str = "", exitopt: bool = True) -> str:
|
|
51
59
|
if msg == "":
|
|
52
60
|
msg = f"{pkg_name} Version:"
|
|
53
|
-
|
|
61
|
+
try:
|
|
62
|
+
cur_version = version(pkg_name)
|
|
63
|
+
except PackageNotFoundError:
|
|
64
|
+
cur_version = "unknown"
|
|
54
65
|
|
|
55
66
|
if exitopt:
|
|
56
67
|
print(f'{msg} {cur_version}')
|