fosslight-util 1.4.34__tar.gz → 1.4.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/PKG-INFO +1 -1
  2. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/requirements.txt +3 -2
  3. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/setup.py +1 -1
  4. fosslight_util-1.4.36/src/fosslight_util/_get_downloadable_url.py +249 -0
  5. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/download.py +34 -8
  6. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/read_excel.py +10 -8
  7. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/write_excel.py +4 -0
  8. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/write_scancodejson.py +3 -1
  9. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util.egg-info/PKG-INFO +1 -1
  10. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util.egg-info/requires.txt +2 -1
  11. fosslight_util-1.4.34/src/fosslight_util/_get_downloadable_url.py +0 -168
  12. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/LICENSE +0 -0
  13. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/MANIFEST.in +0 -0
  14. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/README.md +0 -0
  15. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/setup.cfg +0 -0
  16. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/__init__.py +0 -0
  17. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/compare_yaml.py +0 -0
  18. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/constant.py +0 -0
  19. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/convert_excel_to_yaml.py +0 -0
  20. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/correct.py +0 -0
  21. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/help.py +0 -0
  22. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/oss_item.py +0 -0
  23. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/output_format.py +0 -0
  24. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/parsing_yaml.py +0 -0
  25. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/resources/frequentLicenselist.json +0 -0
  26. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/resources/frequent_license_nick_list.json +0 -0
  27. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/resources/licenses.json +0 -0
  28. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/set_log.py +0 -0
  29. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/spdx_licenses.py +0 -0
  30. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/timer_thread.py +0 -0
  31. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/write_opossum.py +0 -0
  32. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/write_spdx.py +0 -0
  33. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/write_txt.py +0 -0
  34. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util/write_yaml.py +0 -0
  35. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util.egg-info/SOURCES.txt +0 -0
  36. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util.egg-info/dependency_links.txt +0 -0
  37. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util.egg-info/entry_points.txt +0 -0
  38. {fosslight_util-1.4.34 → fosslight_util-1.4.36}/src/fosslight_util.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fosslight_util
3
- Version: 1.4.34
3
+ Version: 1.4.36
4
4
  Summary: FOSSLight Util
5
5
  Home-page: https://github.com/fosslight/fosslight_util
6
6
  Author: LG Electronics
@@ -10,7 +10,8 @@ python3-wget
10
10
  beautifulsoup4
11
11
  jsonmerge
12
12
  spdx-tools==0.7.0rc0
13
- npm
14
13
  setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
15
14
  numpy; python_version < '3.8'
16
- numpy>=1.22.2; python_version >= '3.8'
15
+ numpy>=1.22.2; python_version >= '3.8'
16
+ npm
17
+ requests
@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
14
14
  if __name__ == "__main__":
15
15
  setup(
16
16
  name='fosslight_util',
17
- version='1.4.34',
17
+ version='1.4.36',
18
18
  package_dir={"": "src"},
19
19
  packages=find_packages(where='src'),
20
20
  description='FOSSLight Util',
@@ -0,0 +1,249 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright (c) 2020 LG Electronics Inc.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+ import logging
6
+ import re
7
+ import requests
8
+ from npm.bindings import npm_run
9
+ from lastversion import latest
10
+ from bs4 import BeautifulSoup
11
+ from urllib.request import urlopen
12
+ import fosslight_util.constant as constant
13
+
14
+ logger = logging.getLogger(constant.LOGGER_NAME)
15
+
16
+
17
+ def extract_name_version_from_link(link):
18
+ # Github : https://github.com/(owner)/(repo)
19
+ # npm : https://www.npmjs.com/package/(package)/v/(version)
20
+ # npm2 : https://www.npmjs.com/package/@(group)/(package)/v/(version)
21
+ # pypi : https://pypi.org/project/(oss_name)/(version)
22
+ # pypi2 : https://files.pythonhosted.org/packages/source/(alphabet)/(oss_name)/(oss_name)-(version).tar.gz
23
+ # Maven: https://mvnrepository.com/artifact/(group)/(artifact)/(version)
24
+ # pub: https://pub.dev/packages/(package)/versions/(version)
25
+ # Cocoapods: https://cocoapods.org/(package)
26
+ pkg_pattern = {
27
+ "pypi": r'https?:\/\/pypi\.org\/project\/([^\/]+)[\/]?([^\/]*)',
28
+ "pypi2": r'https?:\/\/files\.pythonhosted\.org\/packages\/source\/[\w]\/([^\/]+)\/[\S]+-([^\-]+)\.tar\.gz',
29
+ "maven": r'https?:\/\/mvnrepository\.com\/artifact\/([^\/]+)\/([^\/]+)\/?([^\/]*)',
30
+ "npm": r'https?:\/\/www\.npmjs\.com\/package\/([^\/\@]+)(?:\/v\/)?([^\/]*)',
31
+ "npm2": r'https?:\/\/www\.npmjs\.com\/package\/(\@[^\/]+\/[^\/]+)(?:\/v\/)?([^\/]*)',
32
+ "pub": r'https?:\/\/pub\.dev\/packages\/([^\/]+)(?:\/versions\/)?([^\/]*)',
33
+ "pods": r'https?:\/\/cocoapods\.org\/pods\/([^\/]+)'
34
+ }
35
+ oss_name = ""
36
+ oss_version = ""
37
+ if link.startswith("www."):
38
+ link = link.replace("www.", "https://www.", 1)
39
+ for key, value in pkg_pattern.items():
40
+ p = re.compile(value)
41
+ match = p.match(link)
42
+ if match:
43
+ try:
44
+ origin_name = match.group(1)
45
+ if (key == "pypi") or (key == "pypi2"):
46
+ oss_name = f"pypi:{origin_name}"
47
+ oss_name = re.sub(r"[-_.]+", "-", oss_name).lower()
48
+ oss_version = match.group(2)
49
+ elif key == "maven":
50
+ artifact = match.group(2)
51
+ oss_name = f"{origin_name}:{artifact}"
52
+ origin_name = oss_name
53
+ oss_version = match.group(3)
54
+ elif key == "npm" or key == "npm2":
55
+ oss_name = f"npm:{origin_name}"
56
+ oss_version = match.group(2)
57
+ elif key == "pub":
58
+ oss_name = f"pub:{origin_name}"
59
+ oss_version = match.group(2)
60
+ elif key == "pods":
61
+ oss_name = f"cocoapods:{origin_name}"
62
+ except Exception as ex:
63
+ logger.info(f"extract_name_version_from_link {key}:{ex}")
64
+ if oss_name and (not oss_version):
65
+ if key in ["pypi", "maven", "npm", "npm2", "pub"]:
66
+ oss_version, link = get_latest_package_version(link, key, origin_name)
67
+ logger.debug(f'Try to download with the latest version:{link}')
68
+ break
69
+ return oss_name, oss_version, link, key
70
+
71
+
72
+ def get_latest_package_version(link, pkg_type, oss_name):
73
+ find_version = ''
74
+ link_with_version = link
75
+
76
+ try:
77
+ if pkg_type in ['npm', 'npm2']:
78
+ stderr, stdout = npm_run('view', oss_name, 'version')
79
+ if stdout:
80
+ find_version = stdout.strip()
81
+ link_with_version = f'https://www.npmjs.com/package/{oss_name}/v/{find_version}'
82
+ elif pkg_type == 'pypi':
83
+ find_version = str(latest(oss_name, at='pip', output_format='version', pre_ok=True))
84
+ link_with_version = f'https://pypi.org/project/{oss_name}/{find_version}'
85
+ elif pkg_type == 'maven':
86
+ maven_response = requests.get(f'https://api.deps.dev/v3alpha/systems/maven/packages/{oss_name}')
87
+ if maven_response.status_code == 200:
88
+ find_version = maven_response.json().get('versions')[-1].get('versionKey').get('version')
89
+ oss_name = oss_name.replace(':', '/')
90
+ link_with_version = f'https://mvnrepository.com/artifact/{oss_name}/{find_version}'
91
+ elif pkg_type == 'pub':
92
+ pub_response = requests.get(f'https://pub.dev/api/packages/{oss_name}')
93
+ if pub_response.status_code == 200:
94
+ find_version = pub_response.json().get('latest').get('version')
95
+ link_with_version = f'https://pub.dev/packages/{oss_name}/versions/{find_version}'
96
+ except Exception as e:
97
+ logger.debug(f'Fail to get latest package version({link}:{e})')
98
+ return find_version, link_with_version
99
+
100
+
101
+ def get_downloadable_url(link):
102
+
103
+ ret = False
104
+ result_link = link
105
+
106
+ oss_name, oss_version, new_link, pkg_type = extract_name_version_from_link(link)
107
+ new_link = new_link.replace('http://', '')
108
+ new_link = new_link.replace('https://', '')
109
+
110
+ if pkg_type == "pypi":
111
+ ret, result_link = get_download_location_for_pypi(new_link)
112
+ elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/'):
113
+ ret, result_link = get_download_location_for_maven(new_link)
114
+ elif (pkg_type in ["npm", "npm2"]) or new_link.startswith('registry.npmjs.org/'):
115
+ ret, result_link = get_download_location_for_npm(new_link)
116
+ elif pkg_type == "pub":
117
+ ret, result_link = get_download_location_for_pub(new_link)
118
+
119
+ return ret, result_link, oss_name, oss_version
120
+
121
+
122
+ def get_download_location_for_pypi(link):
123
+ # get the url for downloading source file in pypi.org/project/(oss_name)/(oss_version)/#files
124
+ ret = False
125
+ new_link = ''
126
+
127
+ try:
128
+ dn_loc_re = re.findall(r'pypi.org\/project\/?([^\/]*)\/?([^\/]*)', link)
129
+ oss_name = dn_loc_re[0][0]
130
+ oss_version = dn_loc_re[0][1]
131
+
132
+ pypi_url = 'https://pypi.org/project/' + oss_name + '/' + oss_version + '/#files'
133
+
134
+ content = urlopen(pypi_url).read().decode('utf8')
135
+ bs_obj = BeautifulSoup(content, 'html.parser')
136
+
137
+ card_file_list = bs_obj.findAll('div', {'class': 'card file__card'})
138
+
139
+ for card_file in card_file_list:
140
+ file_code = card_file.find('code').text
141
+ if file_code == "source":
142
+ new_link = card_file.find('a').attrs['href']
143
+ ret = True
144
+ break
145
+ except Exception as error:
146
+ ret = False
147
+ logger.warning('Cannot find the link for pypi (url:'+link+') '+str(error))
148
+
149
+ return ret, new_link
150
+
151
+
152
+ def get_download_location_for_maven(link):
153
+ # get the url for downloading source file in
154
+ # repo1.maven.org/maven2/(group_id(split to separator '/'))/(artifact_id)/(oss_version)
155
+ ret = False
156
+ new_link = ''
157
+
158
+ try:
159
+ if link.startswith('mvnrepository.com/artifact/'):
160
+ dn_loc_split = link.replace('mvnrepository.com/', '').split('/')
161
+ group_id = dn_loc_split[1].replace('.', '/')
162
+ dn_loc = 'https://repo1.maven.org/maven2/' + group_id + '/' + dn_loc_split[2] + '/' + dn_loc_split[3]
163
+
164
+ elif link.startswith('repo1.maven.org/maven2/'):
165
+ dn_loc_split = link.replace('repo1.maven.org/maven2/', '').split('/')
166
+
167
+ if link.endswith('.tar.gz') or link.endswith('.jar') or link.endswith('.tar.xz'):
168
+ new_link = 'https://' + link
169
+ ret = True
170
+ return ret, new_link
171
+ else:
172
+ dn_loc = 'https://' + link
173
+ else:
174
+ raise Exception("not valid url for maven")
175
+
176
+ html = urlopen(dn_loc).read().decode('utf8')
177
+ bs_obj = BeautifulSoup(html, 'html.parser')
178
+
179
+ file_name = dn_loc.split('/')[-2] + '-' + dn_loc.split('/')[-1] + '-sources.jar'
180
+
181
+ for link in bs_obj.findAll("a"):
182
+ if link.text == file_name:
183
+ source_url = link['href']
184
+ new_link = dn_loc + '/' + source_url
185
+ break
186
+ elif link['href'].endswith('sources.jar') or link['href'].endswith('source.jar') or link['href'].endswith('src.jar'):
187
+ source_url = link['href']
188
+ new_link = dn_loc + '/' + source_url
189
+
190
+ if new_link != '':
191
+ ret = True
192
+
193
+ except Exception as error:
194
+ ret = False
195
+ logger.warning('Cannot find the link for maven (url:'+link+') '+str(error))
196
+
197
+ return ret, new_link
198
+
199
+
200
+ def get_download_location_for_npm(link):
201
+ # url format : registry.npmjs.org/packagename/-/packagename-version.tgz
202
+ ret = False
203
+ new_link = ''
204
+ oss_version = ""
205
+ oss_name_npm = ""
206
+ tar_name = ""
207
+
208
+ link = link.replace('%40', '@')
209
+ if link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org/'):
210
+ try:
211
+ dn_loc_split = link.split('/')
212
+ if dn_loc_split[1] == 'package':
213
+ idx = 2
214
+ else:
215
+ idx = 1
216
+ if dn_loc_split[idx].startswith('@'):
217
+ oss_name_npm = dn_loc_split[idx]+'/'+dn_loc_split[idx+1]
218
+ tar_name = dn_loc_split[idx+1]
219
+ oss_version = dn_loc_split[idx+3]
220
+ else:
221
+ oss_name_npm = dn_loc_split[idx]
222
+ tar_name = oss_name_npm
223
+ oss_version = dn_loc_split[idx+2]
224
+
225
+ tar_name = f'{tar_name}-{oss_version}'
226
+ new_link = f'https://registry.npmjs.org/{oss_name_npm}/-/{tar_name}.tgz'
227
+ ret = True
228
+ except Exception as error:
229
+ ret = False
230
+ logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))
231
+ return ret, new_link
232
+
233
+
234
+ def get_download_location_for_pub(link):
235
+ ret = False
236
+ new_link = ''
237
+
238
+ # url format : https://pub.dev/packages/(oss_name)/versions/(oss_version)
239
+ # download url format : https://pub.dev/packages/(oss_name)/versions/(oss_version).tar.gz
240
+ try:
241
+ if link.startswith('pub.dev/packages'):
242
+ new_link = f'https://{link}.tar.gz'
243
+ ret = True
244
+
245
+ except Exception as error:
246
+ ret = False
247
+ logger.warning('Cannot find the link for pub (url:'+link+') '+str(error))
248
+
249
+ return ret, new_link
@@ -24,6 +24,7 @@ import time
24
24
  import threading
25
25
  import platform
26
26
  import subprocess
27
+ import re
27
28
 
28
29
  logger = logging.getLogger(constant.LOGGER_NAME)
29
30
  compression_extension = {".tar.bz2", ".tar.gz", ".tar.xz", ".tgz", ".tar", ".zip", ".jar", ".bz2"}
@@ -115,6 +116,8 @@ def cli_download_and_extract(link, target_dir, log_dir, checkout_to="", compress
115
116
 
116
117
  success = True
117
118
  msg = ""
119
+ oss_name = ""
120
+ oss_version = ""
118
121
  log_file_name = "fosslight_download_" + \
119
122
  datetime.now().strftime('%Y%m%d_%H-%M-%S')+".txt"
120
123
  logger, log_item = init_log(os.path.join(log_dir, log_file_name))
@@ -135,22 +138,29 @@ def cli_download_and_extract(link, target_dir, log_dir, checkout_to="", compress
135
138
  is_rubygems = src_info.get("rubygems", False)
136
139
 
137
140
  # General download (git clone, wget)
138
- if (not is_rubygems) and (not download_git_clone(link, target_dir, checkout_to, tag, branch)):
141
+ success_git, msg, oss_name = download_git_clone(link, target_dir, checkout_to, tag, branch)
142
+ if (not is_rubygems) and (not success_git):
139
143
  if os.path.isfile(target_dir):
140
144
  shutil.rmtree(target_dir)
141
145
 
142
- success, downloaded_file = download_wget(link, target_dir, compressed_only)
146
+ success, downloaded_file, msg_wget, oss_name, oss_version = download_wget(link, target_dir, compressed_only)
143
147
  if success:
144
148
  success = extract_compressed_file(downloaded_file, target_dir, True)
145
149
  # Download from rubygems.org
146
150
  elif is_rubygems and shutil.which("gem"):
147
151
  success = gem_download(link, target_dir, checkout_to)
152
+ if msg:
153
+ msg = f'git fail: {msg}'
154
+ if msg_wget:
155
+ msg = f'{msg}, wget fail: {msg_wget}'
156
+ else:
157
+ msg = f'{msg}, wget success'
148
158
  except Exception as error:
149
159
  success = False
150
160
  msg = str(error)
151
161
 
152
- logger.info(f"\n* FOSSLight Downloader - Result: {success}\n {msg}")
153
- return success, msg
162
+ logger.info(f"\n* FOSSLight Downloader - Result: {success} ({msg})")
163
+ return success, msg, oss_name, oss_version
154
164
 
155
165
 
156
166
  def get_ref_to_checkout(checkout_to, ref_list):
@@ -184,8 +194,19 @@ def decide_checkout(checkout_to="", tag="", branch=""):
184
194
  return ref_to_checkout
185
195
 
186
196
 
197
+ def get_github_ossname(link):
198
+ oss_name = ""
199
+ p = re.compile(r'https?:\/\/github.com\/([^\/]+)\/([^\/\.]+)(\.git)?')
200
+ match = p.match(link)
201
+ if match:
202
+ oss_name = f"{match.group(1)}-{match.group(2)}"
203
+ return oss_name
204
+
205
+
187
206
  def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
188
207
  ref_to_checkout = decide_checkout(checkout_to, tag, branch)
208
+ msg = ""
209
+ oss_name = get_github_ossname(git_url)
189
210
 
190
211
  if platform.system() != "Windows":
191
212
  signal.signal(signal.SIGALRM, alarm_handler)
@@ -204,7 +225,8 @@ def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
204
225
  del alarm
205
226
  except Exception as error:
206
227
  logger.warning(f"git clone - failed: {error}")
207
- return False
228
+ msg = str(error)
229
+ return False, msg, oss_name
208
230
  try:
209
231
  if ref_to_checkout != "":
210
232
  ref_list = [x for x in repo.references]
@@ -213,11 +235,14 @@ def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
213
235
  repo.checkout(ref_to_checkout)
214
236
  except Exception as error:
215
237
  logger.warning(f"git checkout to {ref_to_checkout} - failed: {error}")
216
- return True
238
+ return True, msg, oss_name
217
239
 
218
240
 
219
241
  def download_wget(link, target_dir, compressed_only):
220
242
  success = False
243
+ msg = ""
244
+ oss_name = ""
245
+ oss_version = ""
221
246
  downloaded_file = ""
222
247
  if platform.system() != "Windows":
223
248
  signal.signal(signal.SIGALRM, alarm_handler)
@@ -228,7 +253,7 @@ def download_wget(link, target_dir, compressed_only):
228
253
  try:
229
254
  Path(target_dir).mkdir(parents=True, exist_ok=True)
230
255
 
231
- ret, new_link = get_downloadable_url(link)
256
+ ret, new_link, oss_name, oss_version = get_downloadable_url(link)
232
257
  if ret and new_link:
233
258
  link = new_link
234
259
 
@@ -255,9 +280,10 @@ def download_wget(link, target_dir, compressed_only):
255
280
  logger.debug(f"wget - downloaded: {downloaded_file}")
256
281
  except Exception as error:
257
282
  success = False
283
+ msg = str(error)
258
284
  logger.warning(f"wget - failed: {error}")
259
285
 
260
- return success, downloaded_file
286
+ return success, downloaded_file, msg, oss_name, oss_version
261
287
 
262
288
 
263
289
  def extract_compressed_dir(src_dir, target_dir, remove_after_extract=True):
@@ -3,6 +3,7 @@
3
3
  # Copyright (c) 2021 LG Electronics Inc.
4
4
  # SPDX-License-Identifier: Apache-2.0
5
5
  import logging
6
+ from typing import List, Dict, Any
6
7
  import xlrd
7
8
  import json
8
9
  from fosslight_util.constant import LOGGER_NAME
@@ -12,17 +13,18 @@ from fosslight_util.parsing_yaml import set_value_switch
12
13
  logger = logging.getLogger(LOGGER_NAME)
13
14
  IDX_CANNOT_FOUND = -1
14
15
  PREFIX_BIN = "bin"
16
+ SHEET_PREFIX_TO_READ = ["bin", "bom", "src"]
15
17
  xlrd.xlsx.ensure_elementtree_imported(False, None)
16
18
  xlrd.xlsx.Element_has_iter = True
17
19
 
18
20
 
19
- def read_oss_report(excel_file, sheet_names=""):
20
- _oss_report_items = []
21
- xl_sheets = {}
22
- all_sheet_to_read = []
23
- not_matched_sheet = []
21
+ def read_oss_report(excel_file: str, sheet_names: str = "") -> List[OssItem]:
22
+ oss_report_items: List[OssItem] = []
23
+ xl_sheets: Dict[str, Any] = {}
24
+ all_sheet_to_read: List[str] = []
25
+ not_matched_sheet: List[str] = []
24
26
  any_sheet_matched = False
25
- SHEET_PREFIX_TO_READ = ["bin", "bom", "src"]
27
+
26
28
  if sheet_names:
27
29
  sheet_name_prefix_match = False
28
30
  sheet_name_to_read = sheet_names.split(",")
@@ -113,8 +115,8 @@ def read_oss_report(excel_file, sheet_names=""):
113
115
  else:
114
116
  valid_row = False if cell_value == "-" else True
115
117
  if valid_row and load_data_cnt > 0:
116
- _oss_report_items.append(item)
118
+ oss_report_items.append(item)
117
119
 
118
120
  except Exception as error:
119
121
  logger.error(f"Parsing a OSS Report: {error}")
120
- return _oss_report_items
122
+ return oss_report_items
@@ -129,6 +129,10 @@ def write_result_to_csv(output_file, sheet_list_origin, separate_sheet=False, ex
129
129
  row_num = 1
130
130
  header_row, sheet_content_without_header = get_header_row(sheet_name, sheet_contents[:], extended_header)
131
131
 
132
+ if 'Copyright Text' in header_row:
133
+ idx = header_row.index('Copyright Text')-1
134
+ for item in sheet_content_without_header:
135
+ item[idx] = item[idx].replace('\n', ', ')
132
136
  if not separate_sheet:
133
137
  merge_sheet.extend(sheet_content_without_header)
134
138
  if sheet_name == list(sheet_list.keys())[-1]:
@@ -7,12 +7,14 @@ import logging
7
7
  import os
8
8
  import json
9
9
  import fosslight_util.constant as constant
10
+ from fosslight_util.oss_item import OssItem
11
+ from typing import List
10
12
 
11
13
  logger = logging.getLogger(constant.LOGGER_NAME)
12
14
  EMPTY_FILE_PATH = '-'
13
15
 
14
16
 
15
- def write_scancodejson(output_dir, output_filename, oss_list):
17
+ def write_scancodejson(output_dir: str, output_filename: str, oss_list: List[OssItem]):
16
18
  json_output = {}
17
19
  json_output['headers'] = []
18
20
  json_output['summary'] = {}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fosslight-util
3
- Version: 1.4.34
3
+ Version: 1.4.36
4
4
  Summary: FOSSLight Util
5
5
  Home-page: https://github.com/fosslight/fosslight_util
6
6
  Author: LG Electronics
@@ -10,8 +10,9 @@ python3-wget
10
10
  beautifulsoup4
11
11
  jsonmerge
12
12
  spdx-tools==0.7.0rc0
13
- npm
14
13
  setuptools>=65.5.1
14
+ npm
15
+ requests
15
16
 
16
17
  [:python_version < "3.8"]
17
18
  numpy
@@ -1,168 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- # Copyright (c) 2020 LG Electronics Inc.
4
- # SPDX-License-Identifier: Apache-2.0
5
- import logging
6
- import re
7
- from bs4 import BeautifulSoup
8
- from urllib.request import urlopen
9
- import fosslight_util.constant as constant
10
- from npm.bindings import npm_run
11
-
12
- logger = logging.getLogger(constant.LOGGER_NAME)
13
-
14
-
15
- def get_downloadable_url(link):
16
-
17
- ret = False
18
- new_link = ''
19
-
20
- link = link.replace('http://', '')
21
- link = link.replace('https://', '')
22
-
23
- if link.startswith('pypi.org/'):
24
- ret, new_link = get_download_location_for_pypi(link)
25
- elif link.startswith('mvnrepository.com/artifact/') or link.startswith('repo1.maven.org/'):
26
- ret, new_link = get_download_location_for_maven(link)
27
- elif link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org/'):
28
- ret, new_link = get_download_location_for_npm(link)
29
- elif link.startswith('pub.dev/'):
30
- ret, new_link = get_download_location_for_pub(link)
31
-
32
- return ret, new_link
33
-
34
-
35
- def get_download_location_for_pypi(link):
36
- # get the url for downloading source file in pypi.org/project/(oss_name)/(oss_version)/#files
37
- ret = False
38
- new_link = ''
39
-
40
- try:
41
- dn_loc_re = re.findall(r'pypi.org\/project\/?([^\/]*)\/?([^\/]*)', link)
42
- oss_name = dn_loc_re[0][0]
43
- oss_version = dn_loc_re[0][1]
44
-
45
- pypi_url = 'https://pypi.org/project/' + oss_name + '/' + oss_version + '/#files'
46
-
47
- content = urlopen(pypi_url).read().decode('utf8')
48
- bs_obj = BeautifulSoup(content, 'html.parser')
49
-
50
- card_file_list = bs_obj.findAll('div', {'class': 'card file__card'})
51
-
52
- for card_file in card_file_list:
53
- file_code = card_file.find('code').text
54
- if file_code == "source":
55
- new_link = card_file.find('a').attrs['href']
56
- ret = True
57
- break
58
- except Exception as error:
59
- ret = False
60
- logger.warning('Cannot find the link for pypi (url:'+link+') '+str(error))
61
-
62
- return ret, new_link
63
-
64
-
65
- def get_download_location_for_maven(link):
66
- # get the url for downloading source file in
67
- # repo1.maven.org/maven2/(group_id(split to separator '/'))/(artifact_id)/(oss_version)
68
- ret = False
69
- new_link = ''
70
-
71
- try:
72
- if link.startswith('mvnrepository.com/artifact/'):
73
- dn_loc_split = link.replace('mvnrepository.com/', '').split('/')
74
- group_id = dn_loc_split[1].replace('.', '/')
75
- dn_loc = 'https://repo1.maven.org/maven2/' + group_id + '/' + dn_loc_split[2] + '/' + dn_loc_split[3]
76
-
77
- elif link.startswith('repo1.maven.org/maven2/'):
78
- dn_loc_split = link.replace('repo1.maven.org/maven2/', '').split('/')
79
-
80
- if link.endswith('.tar.gz') or link.endswith('.jar') or link.endswith('.tar.xz'):
81
- new_link = 'https://' + link
82
- ret = True
83
- return ret, new_link
84
- else:
85
- dn_loc = 'https://' + link
86
- else:
87
- raise Exception("not valid url for maven")
88
-
89
- html = urlopen(dn_loc).read().decode('utf8')
90
- bs_obj = BeautifulSoup(html, 'html.parser')
91
-
92
- file_name = dn_loc.split('/')[-2] + '-' + dn_loc.split('/')[-1] + '-sources.jar'
93
-
94
- for link in bs_obj.findAll("a"):
95
- if link.text == file_name:
96
- source_url = link['href']
97
- new_link = dn_loc + '/' + source_url
98
- break
99
- elif link['href'].endswith('sources.jar') or link['href'].endswith('source.jar') or link['href'].endswith('src.jar'):
100
- source_url = link['href']
101
- new_link = dn_loc + '/' + source_url
102
-
103
- if new_link != '':
104
- ret = True
105
-
106
- except Exception as error:
107
- ret = False
108
- logger.warning('Cannot find the link for maven (url:'+link+') '+str(error))
109
-
110
- return ret, new_link
111
-
112
-
113
- def get_download_location_for_npm(link):
114
- # url format : registry.npmjs.org/packagename/-/packagename-version.tgz
115
- ret = False
116
- new_link = ''
117
- oss_version = ""
118
- oss_name_npm = ""
119
- tar_name = ""
120
-
121
- if link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org/'):
122
- try:
123
- dn_loc_split = link.split('/')
124
- if dn_loc_split[1] == 'package':
125
- idx = 2
126
- else:
127
- idx = 1
128
- if dn_loc_split[idx].startswith('@'):
129
- oss_name_npm = dn_loc_split[idx]+'/'+dn_loc_split[idx+1]
130
- tar_name = dn_loc_split[idx+1]
131
- oss_version = dn_loc_split[idx+3]
132
- else:
133
- oss_name_npm = dn_loc_split[idx]
134
- tar_name = oss_name_npm
135
- oss_version = dn_loc_split[idx+2]
136
- except Exception:
137
- pass
138
-
139
- try:
140
- if not oss_version:
141
- stderr, stdout = npm_run('view', oss_name_npm, 'version')
142
- if stdout:
143
- oss_version = stdout.strip()
144
- tar_name = f"{tar_name}-{oss_version}"
145
- new_link = 'https://registry.npmjs.org/' + oss_name_npm + '/-/' + tar_name + '.tgz'
146
- ret = True
147
- except Exception as error:
148
- ret = False
149
- logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))
150
- return ret, new_link
151
-
152
-
153
- def get_download_location_for_pub(link):
154
- ret = False
155
- new_link = ''
156
-
157
- # url format : https://pub.dev/packages/(oss_name)/versions/(oss_version)
158
- # download url format : https://pub.dev/packages/(oss_name)/versions/(oss_version).tar.gz
159
- try:
160
- if link.startswith('pub.dev/packages'):
161
- new_link = 'https://{link}.tar.gz'
162
- ret = True
163
-
164
- except Exception as error:
165
- ret = False
166
- logger.warning('Cannot find the link for pub (url:'+link+') '+str(error))
167
-
168
- return ret, new_link
File without changes