fosslight-util 2.1.27__tar.gz → 2.1.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/PKG-INFO +27 -2
  2. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/requirements.txt +1 -1
  3. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/setup.py +1 -1
  4. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/_get_downloadable_url.py +253 -73
  5. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/constant.py +2 -0
  6. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/download.py +78 -11
  7. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util.egg-info/PKG-INFO +28 -3
  8. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util.egg-info/SOURCES.txt +8 -1
  9. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util.egg-info/requires.txt +0 -1
  10. fosslight_util-2.1.29/tests/test_cyclonedx.py +20 -0
  11. fosslight_util-2.1.29/tests/test_download.py +140 -0
  12. fosslight_util-2.1.29/tests/test_opossum.py +20 -0
  13. fosslight_util-2.1.29/tests/test_spdx_licenses.py +12 -0
  14. fosslight_util-2.1.29/tests/test_text.py +21 -0
  15. fosslight_util-2.1.29/tests/test_write_output.py +25 -0
  16. fosslight_util-2.1.29/tests/test_write_yaml.py +20 -0
  17. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/LICENSE +0 -0
  18. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/MANIFEST.in +0 -0
  19. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/README.md +0 -0
  20. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/setup.cfg +0 -0
  21. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/__init__.py +0 -0
  22. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/compare_yaml.py +0 -0
  23. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/correct.py +0 -0
  24. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/cover.py +0 -0
  25. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/exclude.py +0 -0
  26. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/help.py +0 -0
  27. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/oss_item.py +0 -0
  28. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/output_format.py +0 -0
  29. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/parsing_yaml.py +0 -0
  30. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/read_excel.py +0 -0
  31. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/resources/frequentLicenselist.json +0 -0
  32. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/resources/frequent_license_nick_list.json +0 -0
  33. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/resources/licenses.json +0 -0
  34. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/set_log.py +0 -0
  35. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/spdx_licenses.py +0 -0
  36. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/timer_thread.py +0 -0
  37. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/write_cyclonedx.py +0 -0
  38. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/write_excel.py +0 -0
  39. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/write_opossum.py +0 -0
  40. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/write_scancodejson.py +0 -0
  41. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/write_spdx.py +0 -0
  42. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/write_txt.py +0 -0
  43. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util/write_yaml.py +0 -0
  44. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util.egg-info/dependency_links.txt +0 -0
  45. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util.egg-info/entry_points.txt +0 -0
  46. {fosslight_util-2.1.27 → fosslight_util-2.1.29}/src/fosslight_util.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: fosslight_util
3
- Version: 2.1.27
3
+ Version: 2.1.29
4
4
  Summary: FOSSLight Util
5
5
  Home-page: https://github.com/fosslight/fosslight_util
6
6
  Download-URL: https://github.com/fosslight/fosslight_util
@@ -13,6 +13,31 @@ Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
+ Requires-Dist: XlsxWriter
17
+ Requires-Dist: pandas
18
+ Requires-Dist: openpyxl
19
+ Requires-Dist: progress
20
+ Requires-Dist: PyYAML
21
+ Requires-Dist: lastversion
22
+ Requires-Dist: coloredlogs
23
+ Requires-Dist: beautifulsoup4
24
+ Requires-Dist: jsonmerge
25
+ Requires-Dist: spdx-tools==0.8.*; sys_platform == "linux"
26
+ Requires-Dist: setuptools>=65.5.1
27
+ Requires-Dist: numpy
28
+ Requires-Dist: requests
29
+ Requires-Dist: GitPython
30
+ Requires-Dist: cyclonedx-python-lib==8.5.*; sys_platform == "linux"
31
+ Dynamic: author
32
+ Dynamic: classifier
33
+ Dynamic: description
34
+ Dynamic: description-content-type
35
+ Dynamic: download-url
36
+ Dynamic: home-page
37
+ Dynamic: license
38
+ Dynamic: license-file
39
+ Dynamic: requires-dist
40
+ Dynamic: summary
16
41
 
17
42
  <!--
18
43
  Copyright (c) 2021 LG Electronics
@@ -5,7 +5,7 @@ progress
5
5
  PyYAML
6
6
  lastversion
7
7
  coloredlogs
8
- python3-wget
8
+
9
9
  beautifulsoup4
10
10
  jsonmerge
11
11
  spdx-tools==0.8.*;sys_platform=="linux"
@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
14
14
  if __name__ == "__main__":
15
15
  setup(
16
16
  name='fosslight_util',
17
- version='2.1.27',
17
+ version='2.1.29',
18
18
  package_dir={"": "src"},
19
19
  packages=find_packages(where='src'),
20
20
  description='FOSSLight Util',
@@ -54,76 +54,185 @@ def extract_name_version_from_link(link, checkout_version):
54
54
  oss_name = ""
55
55
  oss_version = ""
56
56
  matched = False
57
+ direct_maven = False
58
+
57
59
  if link.startswith("www."):
58
60
  link = link.replace("www.", "https://www.", 1)
59
- for key, value in constant.PKG_PATTERN.items():
60
- p = re.compile(value)
61
- match = p.match(link)
62
- if match:
63
- try:
64
- origin_name = match.group(1)
65
- if (key == "pypi") or (key == "pypi2"):
66
- oss_name = f"pypi:{origin_name}"
67
- oss_name = re.sub(r"[-_.]+", "-", oss_name)
68
- oss_version = match.group(2)
69
- elif key == "maven":
70
- artifact = match.group(2)
71
- oss_name = f"{origin_name}:{artifact}"
72
- origin_name = oss_name
73
- oss_version = match.group(3)
74
- elif key == "npm" or key == "npm2":
75
- oss_name = f"npm:{origin_name}"
76
- oss_version = match.group(2)
77
- elif key == "pub":
78
- oss_name = f"pub:{origin_name}"
79
- oss_version = match.group(2)
80
- elif key == "cocoapods":
81
- oss_name = f"cocoapods:{origin_name}"
82
- elif key == "go":
83
- if origin_name.endswith('/'):
84
- origin_name = origin_name[:-1]
85
- oss_name = f"go:{origin_name}"
86
- oss_version = match.group(2)
87
- elif key == "cargo":
88
- oss_name = f"cargo:{origin_name}"
89
- oss_version = match.group(2)
90
- except Exception as ex:
91
- logger.info(f"extract_name_version_from_link {key}:{ex}")
92
- if oss_name:
93
- # Priority: 1) detected oss_version 2) checkout_version 3) latest
94
- need_latest = False
95
-
96
- if not oss_version and checkout_version:
97
- oss_version = checkout_version.strip()
98
- if key in ["pypi", "maven", "npm", "npm2", "pub", "go"]:
99
- if oss_version:
100
- try:
101
- if not version_exists(key, origin_name, oss_version):
102
- logger.info(f'Version {oss_version} not found for {oss_name}; will attempt latest fallback')
103
- need_latest = True
104
- except Exception as e:
105
- logger.info(f'Version validation failed ({oss_name}:{oss_version}) {e}; will attempt latest fallback')
106
- need_latest = True
107
- else:
108
- need_latest = True
109
- if need_latest:
110
- latest_ver = get_latest_package_version(link, key, origin_name)
111
- if latest_ver:
112
- if oss_version and latest_ver != oss_version:
113
- logger.info(f'Fallback to latest version {latest_ver} (previous invalid: {oss_version})')
114
- elif not oss_version:
115
- logger.info(f'Using latest version {latest_ver} (no version detected)')
116
- oss_version = latest_ver
117
- if oss_version:
118
- try:
119
- link = get_new_link_with_version(link, key, origin_name, oss_version)
120
- except Exception as _e:
121
- logger.info(f'Failed to build versioned link for {oss_name}:{oss_version} {_e}')
61
+
62
+ if (not matched and (
63
+ link.startswith('https://repo1.maven.org/maven2/') or
64
+ link.startswith('https://dl.google.com/android/maven2/')
65
+ )):
66
+ parsed = parse_direct_maven_url(link)
67
+ if parsed:
68
+ origin_name, parsed_version = parsed
69
+ oss_name = origin_name # groupId:artifactId
70
+ oss_version = parsed_version or ""
122
71
  matched = True
123
- break
72
+ direct_maven = True
73
+ pkg_type = 'maven'
74
+
75
+ for direct_key in ["maven_repo1", "maven_google"]:
76
+ pattern = constant.PKG_PATTERN.get(direct_key)
77
+ if pattern and re.match(pattern, link):
78
+ parsed = parse_direct_maven_url(link)
79
+ if parsed:
80
+ origin_name, parsed_version = parsed
81
+ oss_name = origin_name
82
+ oss_version = parsed_version or ""
83
+ matched = True
84
+ direct_maven = True
85
+ pkg_type = 'maven'
86
+ break
87
+
88
+ if not matched:
89
+ for key, value in constant.PKG_PATTERN.items():
90
+ if key in ["maven_repo1", "maven_google"]:
91
+ continue
92
+ p = re.compile(value)
93
+ match = p.match(link)
94
+ if match:
95
+ try:
96
+ pkg_type = key
97
+ origin_name = match.group(1)
98
+ if (key == "pypi") or (key == "pypi2"):
99
+ oss_name = f"pypi:{origin_name}"
100
+ oss_name = re.sub(r"[-_.]+", "-", oss_name)
101
+ oss_version = match.group(2)
102
+ pkg_type = 'pypi'
103
+ elif key == "maven":
104
+ artifact = match.group(2)
105
+ oss_name = f"{origin_name}:{artifact}"
106
+ origin_name = oss_name
107
+ oss_version = match.group(3)
108
+ elif key == "npm" or key == "npm2":
109
+ oss_name = f"npm:{origin_name}"
110
+ oss_version = match.group(2)
111
+ elif key == "pub":
112
+ oss_name = f"pub:{origin_name}"
113
+ oss_version = match.group(2)
114
+ elif key == "cocoapods":
115
+ oss_name = f"cocoapods:{origin_name}"
116
+ elif key == "go":
117
+ if origin_name.endswith('/'):
118
+ origin_name = origin_name[:-1]
119
+ oss_name = f"go:{origin_name}"
120
+ oss_version = match.group(2)
121
+ elif key == "cargo":
122
+ oss_name = f"cargo:{origin_name}"
123
+ oss_version = match.group(2)
124
+ except Exception as ex:
125
+ logger.info(f"extract_name_version_from_link {key}:{ex}")
126
+ if oss_name:
127
+ matched = True
128
+ break
129
+
124
130
  if not matched:
125
- key = ""
126
- return oss_name, oss_version, link, key
131
+ return "", "", link, ""
132
+ else:
133
+ need_latest = False
134
+ if not oss_version and checkout_version:
135
+ oss_version = checkout_version.strip()
136
+ if pkg_type in ["pypi", "maven", "npm", "npm2", "pub", "go"]:
137
+ if oss_version:
138
+ try:
139
+ if not version_exists(pkg_type, origin_name, oss_version):
140
+ logger.info(f'Version {oss_version} not found for {oss_name}; will attempt latest fallback')
141
+ need_latest = True
142
+ except Exception as e:
143
+ logger.info(f'Version validation failed ({oss_name}:{oss_version}) {e}; will attempt latest fallback')
144
+ need_latest = True
145
+ else:
146
+ need_latest = True
147
+ if need_latest:
148
+ latest_ver = get_latest_package_version(link, pkg_type, origin_name)
149
+ if latest_ver:
150
+ if oss_version and latest_ver != oss_version:
151
+ logger.info(f'Fallback to latest version {latest_ver} (previous invalid: {oss_version})')
152
+ elif not oss_version:
153
+ logger.info(f'Using latest version {latest_ver} (no version detected)')
154
+ oss_version = latest_ver
155
+
156
+ try:
157
+ if oss_version:
158
+ if pkg_type == 'maven' and direct_maven:
159
+ # Skip if oss_name malformed
160
+ if ':' in oss_name:
161
+ parts = oss_name.split(':', 1)
162
+ group_id, artifact_id = parts[0], parts[1]
163
+ group_path = group_id.replace('.', '/')
164
+ if (
165
+ link.startswith('https://repo1.maven.org/maven2/') or
166
+ link.startswith('http://repo1.maven.org/maven2/')
167
+ ):
168
+ if not re.search(r'/\d[^/]*/*$', link.rstrip('/')):
169
+ link = (
170
+ f'https://repo1.maven.org/maven2/{group_path}/'
171
+ f'{artifact_id}/{oss_version}'
172
+ )
173
+ elif (
174
+ link.startswith('https://dl.google.com/android/maven2/') or
175
+ link.startswith('http://dl.google.com/android/maven2/')
176
+ ):
177
+ if not re.search(r'/\d[^/]*/*$', link.rstrip('/')):
178
+ link = (
179
+ f'https://dl.google.com/android/maven2/{group_path}/'
180
+ f'{artifact_id}/{oss_version}/{artifact_id}-{oss_version}-sources.jar'
181
+ )
182
+ else:
183
+ logger.debug(f'Skip maven normalization due to invalid oss_name: {oss_name}')
184
+ else:
185
+ link = get_new_link_with_version(link, pkg_type, origin_name, oss_version)
186
+ except Exception as _e:
187
+ logger.info(f'Failed to build versioned link for {oss_name or origin_name}:{oss_version} {_e}')
188
+
189
+ return oss_name, oss_version, link, pkg_type
190
+
191
+
192
+ def parse_direct_maven_url(url):
193
+ try:
194
+ clean_url = url.replace('https://', '').replace('http://', '')
195
+ if clean_url.startswith('repo1.maven.org/maven2/'):
196
+ base_path = clean_url[len('repo1.maven.org/maven2/'):]
197
+ elif clean_url.startswith('dl.google.com/android/maven2/'):
198
+ base_path = clean_url[len('dl.google.com/android/maven2/'):]
199
+ else:
200
+ return None
201
+
202
+ base_path = base_path.rstrip('/')
203
+ # Strip file name if ends with known artifact extension.
204
+ if any(base_path.endswith(ext) for ext in ['.jar', '.pom', '.aar']):
205
+ base_path = '/'.join(base_path.split('/')[:-1])
206
+
207
+ parts = base_path.split('/')
208
+ if len(parts) < 2:
209
+ return None
210
+
211
+ version = None
212
+ artifact_id = None
213
+ if len(parts) >= 3:
214
+ potential_version = parts[-1]
215
+ potential_artifact = parts[-2]
216
+ if re.search(r'\d', potential_version):
217
+ version = potential_version
218
+ artifact_id = potential_artifact
219
+ group_parts = parts[:-2]
220
+ else:
221
+ artifact_id = parts[-1]
222
+ group_parts = parts[:-1]
223
+ else:
224
+ artifact_id = parts[-1]
225
+ group_parts = parts[:-1]
226
+
227
+ group_id = '.'.join(group_parts)
228
+ if not group_id or not artifact_id:
229
+ return None
230
+
231
+ maven_name = f"{group_id}:{artifact_id}"
232
+ return maven_name, version
233
+ except Exception as e:
234
+ logger.debug(f'Failed to parse direct Maven URL {url}: {e}')
235
+ return None
127
236
 
128
237
 
129
238
  def get_new_link_with_version(link, pkg_type, oss_name, oss_version):
@@ -160,7 +269,45 @@ def get_latest_package_version(link, pkg_type, oss_name):
160
269
  if maven_response.status_code == 200:
161
270
  versions = maven_response.json().get('versions', [])
162
271
  if versions:
163
- cand = max(versions, key=lambda v: v.get('publishedAt', ''))
272
+ # Some version entries may miss publishedAt; fallback to semantic version ordering.
273
+ def sem_key(vstr: str):
274
+ # Parse semantic version with optional prerelease label
275
+ # Examples: 1.9.0, 1.10.0-alpha, 2.0.0-rc
276
+ m = re.match(r'^(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:[-.]([A-Za-z0-9]+))?$', vstr)
277
+ if not m:
278
+ return (0, 0, 0, 999)
279
+ major = int(m.group(1) or 0)
280
+ minor = int(m.group(2) or 0)
281
+ patch = int(m.group(3) or 0)
282
+ label = (m.group(4) or '').lower()
283
+ # Assign label weights: stable > rc > beta > alpha
284
+ label_weight_map = {
285
+ 'alpha': -3,
286
+ 'beta': -2,
287
+ 'rc': -1
288
+ }
289
+ weight = label_weight_map.get(label, 0 if label == '' else -4)
290
+ return (major, minor, patch, weight)
291
+
292
+ with_pub = [v for v in versions if v.get('publishedAt')]
293
+ if with_pub:
294
+ cand = max(with_pub, key=lambda v: v.get('publishedAt'))
295
+ else:
296
+ decorated = []
297
+ for v in versions:
298
+ vkey = v.get('versionKey', {})
299
+ ver = vkey.get('version', '')
300
+ if ver:
301
+ decorated.append((sem_key(ver), ver, v))
302
+ if decorated:
303
+ decorated.sort(key=lambda t: t[0])
304
+ stable_candidates = [t for t in decorated if t[0][3] == 0]
305
+ if stable_candidates:
306
+ cand = stable_candidates[-1][2]
307
+ else:
308
+ cand = decorated[-1][2]
309
+ else:
310
+ cand = versions[-1]
164
311
  find_version = cand.get('versionKey', {}).get('version', '')
165
312
  elif pkg_type == 'pub':
166
313
  pub_response = requests.get(f'https://pub.dev/api/packages/{oss_name}')
@@ -188,7 +335,7 @@ def get_downloadable_url(link, checkout_version):
188
335
 
189
336
  if pkg_type == "pypi":
190
337
  ret, result_link = get_download_location_for_pypi(new_link)
191
- elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/'):
338
+ elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/') or new_link.startswith('dl.google.com/android/maven2/'):
192
339
  ret, result_link = get_download_location_for_maven(new_link)
193
340
  elif (pkg_type in ["npm", "npm2"]) or new_link.startswith('registry.npmjs.org/'):
194
341
  ret, result_link = get_download_location_for_npm(new_link)
@@ -352,19 +499,52 @@ def get_download_location_for_maven(link):
352
499
 
353
500
  try:
354
501
  if link.startswith('mvnrepository.com/artifact/'):
355
- dn_loc_split = link.replace('mvnrepository.com/', '').split('/')
356
- group_id = dn_loc_split[1].replace('.', '/')
357
- dn_loc = 'https://repo1.maven.org/maven2/' + group_id + '/' + dn_loc_split[2] + '/' + dn_loc_split[3]
502
+ parts = link.replace('mvnrepository.com/artifact/', '').split('/')
503
+ if len(parts) < 2:
504
+ raise Exception('invalid mvnrepository artifact url')
505
+ group_raw = parts[0]
506
+ artifact_id = parts[1]
507
+ version = parts[2] if len(parts) > 2 and parts[2] else ''
508
+ group_path = group_raw.replace('.', '/')
509
+
510
+ repo_base = f'https://repo1.maven.org/maven2/{group_path}/{artifact_id}'
511
+ try:
512
+ urlopen(repo_base)
513
+ if version:
514
+ dn_loc = f'{repo_base}/{version}'
515
+ else:
516
+ new_link = repo_base
517
+ ret = True
518
+ return ret, new_link
519
+ except Exception:
520
+ google_base = f'https://dl.google.com/android/maven2/{group_path}/{artifact_id}'
521
+ if version:
522
+ google_sources = f'{google_base}/{version}/{artifact_id}-{version}-sources.jar'
523
+ try:
524
+ res_g = urlopen(google_sources)
525
+ if res_g.getcode() == 200:
526
+ ret = True
527
+ return ret, google_sources
528
+ except Exception:
529
+ pass
530
+ new_link = google_base
531
+ ret = True
532
+ return ret, new_link
358
533
 
359
534
  elif link.startswith('repo1.maven.org/maven2/'):
360
- dn_loc_split = link.replace('repo1.maven.org/maven2/', '').split('/')
361
-
362
535
  if link.endswith('.tar.gz') or link.endswith('.jar') or link.endswith('.tar.xz'):
363
536
  new_link = 'https://' + link
364
537
  ret = True
365
538
  return ret, new_link
366
539
  else:
367
540
  dn_loc = 'https://' + link
541
+ elif link.startswith('dl.google.com/android/maven2/'):
542
+ if link.endswith('.jar'):
543
+ new_link = 'https://' + link
544
+ ret = True
545
+ return ret, new_link
546
+ else:
547
+ dn_loc = 'https://' + link
368
548
  else:
369
549
  raise Exception("not valid url for maven")
370
550
 
@@ -40,6 +40,8 @@ PKG_PATTERN = {
40
40
  "pypi": r'https?:\/\/pypi\.org\/project\/([^\/]+)[\/]?([^\/]*)',
41
41
  "pypi2": r'https?:\/\/files\.pythonhosted\.org\/packages\/source\/[\w]\/([^\/]+)\/[\S]+-([^\-]+)\.tar\.gz',
42
42
  "maven": r'https?:\/\/mvnrepository\.com\/artifact\/([^\/]+)\/([^\/]+)\/?([^\/]*)',
43
+ "maven_repo1": r'https?:\/\/repo1\.maven\.org\/maven2\/(.*)',
44
+ "maven_google": r'https?:\/\/dl\.google\.com\/android\/maven2\/(.*)',
43
45
  "npm": r'https?:\/\/www\.npmjs\.com\/package\/([^\/\@]+)(?:\/v\/)?([^\/]*)',
44
46
  "npm2": r'https?:\/\/www\.npmjs\.com\/package\/(\@[^\/]+\/[^\/]+)(?:\/v\/)?([^\/]*)',
45
47
  "pub": r'https?:\/\/pub\.dev\/packages\/([^\/]+)(?:\/versions\/)?([^\/]*)',
@@ -4,7 +4,7 @@
4
4
  # SPDX-License-Identifier: Apache-2.0
5
5
  import os
6
6
  import sys
7
- import wget
7
+ import requests
8
8
  import tarfile
9
9
  import zipfile
10
10
  import logging
@@ -58,6 +58,22 @@ def alarm_handler(signum, frame):
58
58
  raise TimeOutException(f'Timeout ({SIGNAL_TIMEOUT} sec)', 1)
59
59
 
60
60
 
61
+ def is_downloadable(url):
62
+ try:
63
+ h = requests.head(url, allow_redirects=True)
64
+ header = h.headers
65
+ content_type = header.get('content-type')
66
+ if 'text/html' in content_type.lower():
67
+ return False
68
+ content_disposition = header.get('content-disposition')
69
+ if content_disposition and 'attachment' in content_disposition.lower():
70
+ return True
71
+ return True
72
+ except Exception as e:
73
+ logger.warning(f"is_downloadable - failed: {e}")
74
+ return False
75
+
76
+
61
77
  def change_src_link_to_https(src_link):
62
78
  src_link = src_link.replace("git://", "https://")
63
79
  if src_link.endswith(".git"):
@@ -381,25 +397,30 @@ def download_wget(link, target_dir, compressed_only, checkout_to):
381
397
  link = new_link
382
398
 
383
399
  if compressed_only:
400
+ # Check if link ends with known compression extensions
384
401
  for ext in compression_extension:
385
402
  if link.endswith(ext):
386
403
  success = True
387
404
  break
388
- if not success:
389
- if pkg_type == 'cargo':
390
- success = True
391
405
  else:
392
- success = True
406
+ # If get_downloadable_url found a downloadable file, proceed
407
+ if ret:
408
+ success = True
409
+ else:
410
+ # No downloadable file found in package repositories, verify link is downloadable
411
+ if not is_downloadable(link):
412
+ raise Exception('Not a downloadable link (link:{0})'.format(link))
413
+ success = True
393
414
 
415
+ # Fallback: verify link is downloadable for compressed_only case
394
416
  if not success:
395
- raise Exception('Not supported compression type (link:{0})'.format(link))
417
+ if is_downloadable(link):
418
+ success = True
419
+ else:
420
+ raise Exception('Not a downloadable link (link:{0})'.format(link))
396
421
 
397
422
  logger.info(f"wget: {link}")
398
- if pkg_type == 'cargo':
399
- outfile = os.path.join(target_dir, f'{oss_name}.tar.gz')
400
- downloaded_file = wget.download(link, out=outfile)
401
- else:
402
- downloaded_file = wget.download(link, target_dir)
423
+ downloaded_file = download_file(link, target_dir)
403
424
  if platform.system() != "Windows":
404
425
  signal.alarm(0)
405
426
  else:
@@ -416,6 +437,49 @@ def download_wget(link, target_dir, compressed_only, checkout_to):
416
437
  return success, downloaded_file, msg, oss_name, oss_version
417
438
 
418
439
 
440
+ def download_file(url, target_dir):
441
+ local_path = ""
442
+ try:
443
+ try:
444
+ h = requests.head(url, allow_redirects=True)
445
+ final_url = h.url or url
446
+ headers = h.headers
447
+ except Exception:
448
+ final_url = url
449
+ headers = {}
450
+
451
+ with requests.get(final_url, stream=True, allow_redirects=True) as r:
452
+ r.raise_for_status()
453
+
454
+ filename = ""
455
+ cd = r.headers.get("Content-Disposition") or headers.get("Content-Disposition")
456
+ if cd:
457
+ m_star = re.search(r"filename\*=(?:UTF-8'')?([^;\r\n]+)", cd)
458
+ if m_star:
459
+ filename = urllib.parse.unquote(m_star.group(1).strip('"\''))
460
+ else:
461
+ m = re.search(r"filename=([^;\r\n]+)", cd)
462
+ if m:
463
+ filename = m.group(1).strip('"\'')
464
+ if not filename:
465
+ final_for_name = r.url or final_url
466
+ filename = os.path.basename(urllib.parse.urlparse(final_for_name).path)
467
+ if not filename:
468
+ filename = "downloaded_file"
469
+ if os.path.isdir(target_dir):
470
+ local_path = os.path.join(target_dir, filename)
471
+ else:
472
+ local_path = target_dir
473
+
474
+ with open(local_path, 'wb') as f:
475
+ for chunk in r.iter_content(chunk_size=8192):
476
+ f.write(chunk)
477
+ except Exception as e:
478
+ logger.warning(f"download_file - failed: {e}")
479
+ return None
480
+ return local_path
481
+
482
+
419
483
  def extract_compressed_dir(src_dir, target_dir, remove_after_extract=True):
420
484
  logger.debug(f"Extract Dir: {src_dir}")
421
485
  try:
@@ -450,6 +514,9 @@ def extract_compressed_file(fname, extract_path, remove_after_extract=True, comp
450
514
  decompress_bz2(fname, extract_path)
451
515
  elif fname.endswith(".whl"):
452
516
  unzip(fname, extract_path)
517
+ elif fname.endswith(".crate"):
518
+ with contextlib.closing(tarfile.open(fname, "r:gz")) as t:
519
+ t.extractall(path=extract_path)
453
520
  else:
454
521
  is_compressed_file = False
455
522
  if compressed_only:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
2
- Name: fosslight-util
3
- Version: 2.1.27
1
+ Metadata-Version: 2.4
2
+ Name: fosslight_util
3
+ Version: 2.1.29
4
4
  Summary: FOSSLight Util
5
5
  Home-page: https://github.com/fosslight/fosslight_util
6
6
  Download-URL: https://github.com/fosslight/fosslight_util
@@ -13,6 +13,31 @@ Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Description-Content-Type: text/markdown
15
15
  License-File: LICENSE
16
+ Requires-Dist: XlsxWriter
17
+ Requires-Dist: pandas
18
+ Requires-Dist: openpyxl
19
+ Requires-Dist: progress
20
+ Requires-Dist: PyYAML
21
+ Requires-Dist: lastversion
22
+ Requires-Dist: coloredlogs
23
+ Requires-Dist: beautifulsoup4
24
+ Requires-Dist: jsonmerge
25
+ Requires-Dist: spdx-tools==0.8.*; sys_platform == "linux"
26
+ Requires-Dist: setuptools>=65.5.1
27
+ Requires-Dist: numpy
28
+ Requires-Dist: requests
29
+ Requires-Dist: GitPython
30
+ Requires-Dist: cyclonedx-python-lib==8.5.*; sys_platform == "linux"
31
+ Dynamic: author
32
+ Dynamic: classifier
33
+ Dynamic: description
34
+ Dynamic: description-content-type
35
+ Dynamic: download-url
36
+ Dynamic: home-page
37
+ Dynamic: license
38
+ Dynamic: license-file
39
+ Dynamic: requires-dist
40
+ Dynamic: summary
16
41
 
17
42
  <!--
18
43
  Copyright (c) 2021 LG Electronics
@@ -34,4 +34,11 @@ src/fosslight_util.egg-info/requires.txt
34
34
  src/fosslight_util.egg-info/top_level.txt
35
35
  src/fosslight_util/resources/frequentLicenselist.json
36
36
  src/fosslight_util/resources/frequent_license_nick_list.json
37
- src/fosslight_util/resources/licenses.json
37
+ src/fosslight_util/resources/licenses.json
38
+ tests/test_cyclonedx.py
39
+ tests/test_download.py
40
+ tests/test_opossum.py
41
+ tests/test_spdx_licenses.py
42
+ tests/test_text.py
43
+ tests/test_write_output.py
44
+ tests/test_write_yaml.py
@@ -5,7 +5,6 @@ progress
5
5
  PyYAML
6
6
  lastversion
7
7
  coloredlogs
8
- python3-wget
9
8
  beautifulsoup4
10
9
  jsonmerge
11
10
  setuptools>=65.5.1
@@ -0,0 +1,20 @@
1
+ # Copyright (c) 2021 LG Electronics Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import os
5
+
6
+ from fosslight_util.write_cyclonedx import write_cyclonedx
7
+ from tests import constants
8
+
9
+
10
+ def test_cyclonedx(scan_item):
11
+ # given
12
+ output_dir = os.path.join(constants.TEST_RESULT_DIR, "cyclonedx")
13
+ filename_with_dir = os.path.join(output_dir, "FL-TEST_cyclonedx.json")
14
+
15
+ # when
16
+ success, err_msg, _ = write_cyclonedx(filename_with_dir.split('.')[0], '.json', scan_item)
17
+
18
+ # then
19
+ assert success is True
20
+ assert len(os.listdir(output_dir)) > 0
@@ -0,0 +1,140 @@
1
+ # Copyright (c) 2021 LG Electronics Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import os
5
+ import pytest
6
+
7
+ from fosslight_util.download import cli_download_and_extract, download_git_clone
8
+ from tests import constants
9
+
10
+
11
+ def test_download_from_github():
12
+ # given
13
+ git_url = "https://github.com/LGE-OSS/example"
14
+ target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
15
+ log_dir = "test_result/download_log/example"
16
+
17
+ # when
18
+ success, _, _, _ = cli_download_and_extract(git_url, target_dir, log_dir)
19
+
20
+ # then
21
+ assert success is True
22
+ assert len(os.listdir(target_dir)) > 0
23
+
24
+
25
+ @pytest.mark.parametrize("git_url",
26
+ ["git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git;protocol=git;branch=hash-stat2",
27
+ "git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git;protocol=git;tag=v32"])
28
+ def test_download_from_github_with_branch_or_tag(git_url):
29
+ # given
30
+ target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
31
+ log_dir = "test_result/download_log/example"
32
+
33
+ # when
34
+ success, _, _, _ = cli_download_and_extract(git_url, target_dir, log_dir)
35
+
36
+ # then
37
+ assert success is True
38
+ assert len(os.listdir(target_dir)) > 0
39
+
40
+
41
+ @pytest.mark.parametrize("project_name, project_url",
42
+ [("filelock", "https://pypi.org/project/filelock/3.4.1"),
43
+ ("dependency", "https://pypi.org/project/fosslight-dependency/3.0.5/"),
44
+ ("jackson", "https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind/2.12.2"),
45
+ ("pub", "https://pub.dev/packages/file/versions/5.2.1")])
46
+ def test_download_from_wget(project_name, project_url):
47
+ # given
48
+ target_dir = os.path.join(constants.TEST_RESULT_DIR,
49
+ os.path.join("download", project_name))
50
+ log_dir = os.path.join(constants.TEST_RESULT_DIR,
51
+ os.path.join("download_log" + project_name))
52
+
53
+ # when
54
+ success, _, _, _ = cli_download_and_extract(project_url, target_dir, log_dir)
55
+
56
+ # then
57
+ assert success is True
58
+ assert len(os.listdir(target_dir)) > 0
59
+
60
+
61
+ def test_download_git_clone_with_branch():
62
+ # given
63
+ git_url = "git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git"
64
+ target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
65
+ branch_name = "hash-stat2"
66
+
67
+ # when
68
+ success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, "", "", branch_name)
69
+
70
+ # then
71
+ assert success is True
72
+ assert len(os.listdir(target_dir)) > 0
73
+ assert oss_name == ''
74
+ assert oss_version == branch_name
75
+
76
+
77
+ def test_download_git_clone_with_tag():
78
+ # given
79
+ git_url = "git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git"
80
+ target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
81
+ tag_name = "v32"
82
+
83
+ # when
84
+ success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, "", tag_name)
85
+
86
+ # then
87
+ assert success is True
88
+ assert len(os.listdir(target_dir)) > 0
89
+ assert oss_name == ''
90
+ assert oss_version == tag_name
91
+
92
+
93
+ def test_download_main_branch_when_any_branch_or_tag_not_entered():
94
+ # given
95
+ git_url = "https://github.com/LGE-OSS/example"
96
+ target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
97
+ expected_oss_ver = ""
98
+
99
+ # when
100
+ success, _, oss_name, oss_version = download_git_clone(git_url, target_dir)
101
+
102
+ # then
103
+ assert success is True
104
+ assert len(os.listdir(target_dir)) > 0
105
+ assert oss_name == 'LGE-OSS-example'
106
+ assert oss_version == expected_oss_ver
107
+
108
+
109
+ def test_download_main_branch_when_non_existent_branch_entered():
110
+ # given
111
+ git_url = "https://github.com/LGE-OSS/example"
112
+ target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
113
+ branch_name = "non-existent-branch"
114
+ expected_oss_ver = ""
115
+
116
+ # when
117
+ success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, "", "", branch_name)
118
+
119
+ # then
120
+ assert success is True
121
+ assert len(os.listdir(target_dir)) > 0
122
+ assert oss_name == 'LGE-OSS-example'
123
+ assert oss_version == expected_oss_ver
124
+
125
+
126
+ def test_download_main_branch_when_non_existent_tag_entered():
127
+ # given
128
+ git_url = "https://github.com/LGE-OSS/example"
129
+ target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
130
+ tag_name = "non-existent-tag"
131
+ expected_oss_ver = ""
132
+
133
+ # when
134
+ success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, "", tag_name)
135
+
136
+ # then
137
+ assert success is True
138
+ assert len(os.listdir(target_dir)) > 0
139
+ assert oss_name == 'LGE-OSS-example'
140
+ assert oss_version == expected_oss_ver
@@ -0,0 +1,20 @@
1
+ # Copyright (c) 2021 LG Electronics Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import os
5
+
6
+ from fosslight_util.write_opossum import write_opossum
7
+ from tests import constants
8
+
9
+
10
+ def test_opossum(scan_item):
11
+ # given
12
+ output_dir = os.path.join(constants.TEST_RESULT_DIR, "opossum")
13
+ filename_with_dir = os.path.join(output_dir, "FL-TEST_opossum.json")
14
+
15
+ # when
16
+ success, _ = write_opossum(filename_with_dir, scan_item)
17
+
18
+ # then
19
+ assert success is True
20
+ assert len(os.listdir(output_dir)) > 0
@@ -0,0 +1,12 @@
1
+ # Copyright (c) 2021 LG Electronics Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ from fosslight_util.spdx_licenses import get_spdx_licenses_json
4
+
5
+
6
+ def test_get_spdx_licenses_json():
7
+ # when
8
+ success, _, licenses = get_spdx_licenses_json()
9
+
10
+ # then
11
+ assert success is True
12
+ assert len(licenses) > 0
@@ -0,0 +1,21 @@
1
+ # Copyright (c) 2021 LG Electronics Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import os
5
+ from fosslight_util.write_txt import write_txt_file
6
+
7
+
8
+ def test_text():
9
+ # given
10
+ output_dir = "test_result/txt"
11
+ file_to_create = os.path.join(output_dir, "test.txt")
12
+ text_to_write = "Testing - Writing text in a file."
13
+
14
+ # when
15
+ success, _ = write_txt_file(file_to_create, text_to_write)
16
+ with open(file_to_create, 'r', encoding='utf-8') as result_file:
17
+ result = result_file.read()
18
+
19
+ # then
20
+ assert success is True
21
+ assert text_to_write in result
@@ -0,0 +1,25 @@
1
+ # Copyright (c) 2021 LG Electronics Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ import os
4
+ from copy import deepcopy
5
+
6
+ import pytest
7
+
8
+ from fosslight_util.output_format import write_output_file
9
+
10
+
11
+ @pytest.mark.parametrize("output_dir, result_file_name, file_extension",
12
+ [("test_result/excel_and_csv/excel", "Test_Excel", ".xlsx"),
13
+ ("test_result/excel_and_csv/csv", "Test_Csv", ".csv"),
14
+ ("test_result/output_format", "FL-TEST_opossum", ".json")])
15
+ def test_write_excel_and_csv(output_dir, result_file_name, file_extension, scan_item):
16
+ # given
17
+ output_file_without_extension = os.path.join(output_dir, result_file_name)
18
+
19
+ # when
20
+ success, _, result_file = write_output_file(output_file_without_extension,
21
+ file_extension, deepcopy(scan_item))
22
+
23
+ # then
24
+ assert success is True
25
+ assert result_file_name + file_extension in result_file
@@ -0,0 +1,20 @@
1
+ # Copyright (c) 2021 LG Electronics Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import os
5
+
6
+ from fosslight_util.write_yaml import write_yaml
7
+ from tests import constants
8
+
9
+
10
+ def test_write_yaml(scan_item):
11
+ # given
12
+ output_dir = os.path.join(constants.TEST_RESULT_DIR, "yaml")
13
+ output_file = os.path.join(output_dir, 'FL-TEST_yaml.yaml')
14
+
15
+ # when
16
+ success, _, output = write_yaml(output_file, scan_item)
17
+
18
+ # then
19
+ assert success is True
20
+ assert output_file in output
File without changes