fosslight-source 2.2.17__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fosslight_source-2.2.17/src/fosslight_source.egg-info → fosslight_source-2.3.1}/PKG-INFO +2 -1
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/pyproject.toml +3 -1
- fosslight_source-2.3.1/src/fosslight_source/_kb_client.py +239 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/_parsing_scancode_file_item.py +1 -1
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/_scan_item.py +24 -50
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/cli.py +179 -86
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/run_scancode.py +11 -2
- {fosslight_source-2.2.17 → fosslight_source-2.3.1/src/fosslight_source.egg-info}/PKG-INFO +2 -1
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/SOURCES.txt +1 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/requires.txt +3 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/tests/test_tox.py +65 -2
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/LICENSE +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/MANIFEST.in +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/README.md +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/setup.cfg +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/__init__.py +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/_help.py +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/_license_matched.py +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/_parsing_scanoss_file.py +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/run_manifest_extractor.py +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/run_scanoss.py +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/run_spdx_extractor.py +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/entry_points.txt +0 -0
- {fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fosslight_source
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: FOSSLight Source Scanner
|
|
5
5
|
Author: LG Electronics
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -26,6 +26,7 @@ Requires-Dist: wheel>=0.38.1
|
|
|
26
26
|
Requires-Dist: intbitset
|
|
27
27
|
Requires-Dist: fosslight_binary>=5.1.22
|
|
28
28
|
Requires-Dist: scancode-toolkit>=32.0.2
|
|
29
|
+
Requires-Dist: cryptography<49; platform_system == "Darwin" and platform_machine == "x86_64"
|
|
29
30
|
Requires-Dist: fingerprints==1.2.3
|
|
30
31
|
Requires-Dist: normality==2.6.1
|
|
31
32
|
Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
|
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "fosslight_source"
|
|
10
|
-
version = "2.
|
|
10
|
+
version = "2.3.1"
|
|
11
11
|
description = "FOSSLight Source Scanner"
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
license = "Apache-2.0"
|
|
@@ -35,6 +35,8 @@ dependencies = [
|
|
|
35
35
|
"intbitset",
|
|
36
36
|
"fosslight_binary>=5.1.22",
|
|
37
37
|
"scancode-toolkit>=32.0.2",
|
|
38
|
+
# cryptography 49.x does not provide macOS x86_64 wheels, causing source builds to require OpenSSL/pkg-config.
|
|
39
|
+
"cryptography<49; platform_system == 'Darwin' and platform_machine == 'x86_64'",
|
|
38
40
|
"fingerprints==1.2.3",
|
|
39
41
|
"normality==2.6.1",
|
|
40
42
|
# Python 3.13+ needs psycopg2-binary 2.9.10+ (has wheels; 2.9.9 builds fail with _PyInterpreterState_Get)
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Copyright (c) 2020 LG Electronics Inc.
|
|
4
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
import urllib.error
|
|
10
|
+
import urllib.request
|
|
11
|
+
from typing import Dict, List, NamedTuple, Optional
|
|
12
|
+
|
|
13
|
+
import fosslight_util.constant as constant
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
16
|
+
|
|
17
|
+
_SCAN_JOB_POLL_INTERVAL_SEC = 1.0
|
|
18
|
+
_SCAN_JOB_POLL_MAX_INTERVAL_SEC = 10.0
|
|
19
|
+
_SCAN_JOB_REQUEST_TIMEOUT_SEC = 30
|
|
20
|
+
_SCAN_JOB_MIN_WAIT_SEC = 300
|
|
21
|
+
_SCAN_JOB_PER_HASH_SEC = 35
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _kb_request(
|
|
25
|
+
kb_url: str,
|
|
26
|
+
path: str,
|
|
27
|
+
*,
|
|
28
|
+
method: str = "GET",
|
|
29
|
+
payload: dict | None = None,
|
|
30
|
+
kb_token: str = "",
|
|
31
|
+
timeout: int = _SCAN_JOB_REQUEST_TIMEOUT_SEC,
|
|
32
|
+
) -> dict:
|
|
33
|
+
data = None
|
|
34
|
+
if payload is not None:
|
|
35
|
+
data = json.dumps(payload).encode("utf-8")
|
|
36
|
+
request = urllib.request.Request(f"{kb_url.rstrip('/')}/{path.lstrip('/')}", data=data, method=method)
|
|
37
|
+
request.add_header("Accept", "application/json")
|
|
38
|
+
if payload is not None:
|
|
39
|
+
request.add_header("Content-Type", "application/json")
|
|
40
|
+
if kb_token:
|
|
41
|
+
request.add_header("Authorization", f"Bearer {kb_token}")
|
|
42
|
+
|
|
43
|
+
with urllib.request.urlopen(request, timeout=timeout) as response:
|
|
44
|
+
body = response.read().decode()
|
|
45
|
+
return json.loads(body) if body else {}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _estimate_job_wait_timeout(file_hash_count: int) -> float:
|
|
49
|
+
return float(max(_SCAN_JOB_MIN_WAIT_SEC, file_hash_count * _SCAN_JOB_PER_HASH_SEC))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _coerce_count(value, default: int) -> int:
|
|
53
|
+
if value is None:
|
|
54
|
+
return default
|
|
55
|
+
try:
|
|
56
|
+
count = int(value)
|
|
57
|
+
except (TypeError, ValueError):
|
|
58
|
+
return default
|
|
59
|
+
return count if count >= 0 else default
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_response_message(response_body: dict) -> Optional[str]:
|
|
63
|
+
message = response_body.get("message")
|
|
64
|
+
if isinstance(message, str):
|
|
65
|
+
message = message.strip()
|
|
66
|
+
if message:
|
|
67
|
+
return message
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _scan_job_failure_message(response_body: dict) -> Optional[str]:
|
|
72
|
+
"""Return server message when a scan/jobs response indicates failure."""
|
|
73
|
+
message = _extract_response_message(response_body)
|
|
74
|
+
if not message:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
status = response_body.get("status")
|
|
78
|
+
if status is None or str(status).lower() == "failed":
|
|
79
|
+
return message
|
|
80
|
+
|
|
81
|
+
if not response_body.get("job_id"):
|
|
82
|
+
return message
|
|
83
|
+
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _parse_http_error_body(error: urllib.error.HTTPError) -> dict:
|
|
88
|
+
try:
|
|
89
|
+
raw = error.read().decode()
|
|
90
|
+
return json.loads(raw) if raw else {}
|
|
91
|
+
except (json.JSONDecodeError, UnicodeDecodeError, OSError):
|
|
92
|
+
return {}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class KbScanJobResult(NamedTuple):
|
|
96
|
+
origin_urls: Dict[str, str]
|
|
97
|
+
failure_message: Optional[str]
|
|
98
|
+
requested_count: int
|
|
99
|
+
returned_count: int
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _kb_scan_job_result(
|
|
103
|
+
origin_urls: Dict[str, str],
|
|
104
|
+
failure_message: Optional[str],
|
|
105
|
+
requested_count: int,
|
|
106
|
+
) -> KbScanJobResult:
|
|
107
|
+
return KbScanJobResult(
|
|
108
|
+
origin_urls=origin_urls,
|
|
109
|
+
failure_message=failure_message,
|
|
110
|
+
requested_count=requested_count,
|
|
111
|
+
returned_count=len(origin_urls),
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def fetch_origin_urls_via_scan_job(
|
|
116
|
+
file_hashes: List[str],
|
|
117
|
+
kb_url: str,
|
|
118
|
+
kb_token: str,
|
|
119
|
+
) -> KbScanJobResult:
|
|
120
|
+
"""
|
|
121
|
+
Create a POST /scan/jobs request, poll until completion, and return a file_hash -> origin_url map.
|
|
122
|
+
:param file_hashes: list of MD5 file hashes to look up.
|
|
123
|
+
:param kb_url: KB API base URL.
|
|
124
|
+
:param kb_token: KB API bearer token.
|
|
125
|
+
:return: origin URLs, optional failure message, and requested/returned file_hash counts.
|
|
126
|
+
"""
|
|
127
|
+
unique_hashes = list(dict.fromkeys(h for h in file_hashes if h))
|
|
128
|
+
requested_count = len(unique_hashes)
|
|
129
|
+
if not unique_hashes:
|
|
130
|
+
return _kb_scan_job_result({}, None, 0)
|
|
131
|
+
|
|
132
|
+
create_payload = {"file_hashes": unique_hashes}
|
|
133
|
+
try:
|
|
134
|
+
created = _kb_request(kb_url, "scan/jobs", method="POST", payload=create_payload, kb_token=kb_token)
|
|
135
|
+
except urllib.error.HTTPError as e:
|
|
136
|
+
failure_message = _scan_job_failure_message(_parse_http_error_body(e))
|
|
137
|
+
if failure_message:
|
|
138
|
+
logger.warning(f"KB scan job create failed: {failure_message}")
|
|
139
|
+
return _kb_scan_job_result({}, failure_message, requested_count)
|
|
140
|
+
logger.warning(f"KB scan job create failed: HTTP {e.code} {e.reason}")
|
|
141
|
+
return _kb_scan_job_result({}, None, requested_count)
|
|
142
|
+
except urllib.error.URLError as e:
|
|
143
|
+
logger.warning(f"KB scan job create failed: {e}")
|
|
144
|
+
return _kb_scan_job_result({}, None, requested_count)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.warning(f"KB scan job create failed: {e}")
|
|
147
|
+
return _kb_scan_job_result({}, None, requested_count)
|
|
148
|
+
|
|
149
|
+
failure_message = _scan_job_failure_message(created)
|
|
150
|
+
if failure_message:
|
|
151
|
+
logger.warning(f"KB scan job create failed: {failure_message}")
|
|
152
|
+
return _kb_scan_job_result({}, failure_message, requested_count)
|
|
153
|
+
|
|
154
|
+
if str(created.get("status", "")).lower() == "failed":
|
|
155
|
+
logger.warning("KB scan job create failed")
|
|
156
|
+
return _kb_scan_job_result({}, None, requested_count)
|
|
157
|
+
|
|
158
|
+
job_id = created.get("job_id", "")
|
|
159
|
+
if not job_id:
|
|
160
|
+
logger.warning("KB scan job create response missing job_id")
|
|
161
|
+
return _kb_scan_job_result({}, None, requested_count)
|
|
162
|
+
|
|
163
|
+
fallback_count = len(unique_hashes)
|
|
164
|
+
accepted = _coerce_count(
|
|
165
|
+
created.get("accepted"),
|
|
166
|
+
_coerce_count(created.get("total"), fallback_count),
|
|
167
|
+
)
|
|
168
|
+
skipped = _coerce_count(created.get("skipped"), 0)
|
|
169
|
+
logger.info(
|
|
170
|
+
f"KB scan job created: job_id={job_id}, total={created.get('total', fallback_count)}, "
|
|
171
|
+
f"accepted={accepted}, skipped={skipped}"
|
|
172
|
+
)
|
|
173
|
+
if skipped:
|
|
174
|
+
logger.warning(f"KB scan job rate-limited: {skipped} file_hash(es) skipped by server")
|
|
175
|
+
if accepted == 0:
|
|
176
|
+
failure_message = (
|
|
177
|
+
f"rate-limited: {skipped} file_hash(es) skipped by server"
|
|
178
|
+
if skipped
|
|
179
|
+
else "scan job accepted no file_hashes"
|
|
180
|
+
)
|
|
181
|
+
return _kb_scan_job_result({}, failure_message, requested_count)
|
|
182
|
+
|
|
183
|
+
deadline = time.monotonic() + _estimate_job_wait_timeout(accepted)
|
|
184
|
+
interval = _SCAN_JOB_POLL_INTERVAL_SEC
|
|
185
|
+
origin_urls: Dict[str, str] = {}
|
|
186
|
+
|
|
187
|
+
while time.monotonic() < deadline:
|
|
188
|
+
try:
|
|
189
|
+
status = _kb_request(kb_url, f"scan/jobs/{job_id}", kb_token=kb_token)
|
|
190
|
+
except urllib.error.HTTPError as e:
|
|
191
|
+
if e.code == 404:
|
|
192
|
+
logger.warning(f"KB scan job not found: {job_id}")
|
|
193
|
+
return _kb_scan_job_result(origin_urls, "scan job not found", requested_count)
|
|
194
|
+
failure_message = _scan_job_failure_message(_parse_http_error_body(e))
|
|
195
|
+
if failure_message:
|
|
196
|
+
logger.warning(f"KB scan job status failed: {failure_message}")
|
|
197
|
+
return _kb_scan_job_result(origin_urls, failure_message, requested_count)
|
|
198
|
+
logger.warning(f"KB scan job status failed: HTTP {e.code}")
|
|
199
|
+
time.sleep(interval)
|
|
200
|
+
interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
|
|
201
|
+
continue
|
|
202
|
+
except urllib.error.URLError as e:
|
|
203
|
+
logger.warning(f"KB scan job status failed: {e}")
|
|
204
|
+
time.sleep(interval)
|
|
205
|
+
interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
|
|
206
|
+
continue
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.warning(f"KB scan job status parse failed: {e}")
|
|
209
|
+
time.sleep(interval)
|
|
210
|
+
interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
job_status = status.get("status", "")
|
|
214
|
+
if job_status == "completed":
|
|
215
|
+
for row in status.get("results", []):
|
|
216
|
+
if not isinstance(row, dict):
|
|
217
|
+
continue
|
|
218
|
+
file_hash = row.get("file_hash", "")
|
|
219
|
+
if row.get("success") and row.get("output") and file_hash:
|
|
220
|
+
origin_urls[file_hash] = row["output"]
|
|
221
|
+
logger.info(
|
|
222
|
+
f"KB scan job completed: job_id={job_id}, "
|
|
223
|
+
f"matched={len(origin_urls)}, failed={status.get('failed', 0)}"
|
|
224
|
+
)
|
|
225
|
+
return _kb_scan_job_result(origin_urls, None, requested_count)
|
|
226
|
+
|
|
227
|
+
if job_status == "failed":
|
|
228
|
+
failure_message = _scan_job_failure_message(status)
|
|
229
|
+
if failure_message:
|
|
230
|
+
logger.warning(f"KB scan job failed: job_id={job_id}, message={failure_message}")
|
|
231
|
+
else:
|
|
232
|
+
logger.warning(f"KB scan job failed: job_id={job_id}")
|
|
233
|
+
return _kb_scan_job_result(origin_urls, failure_message or "scan job failed", requested_count)
|
|
234
|
+
|
|
235
|
+
time.sleep(interval)
|
|
236
|
+
interval = min(interval * 1.5, _SCAN_JOB_POLL_MAX_INTERVAL_SEC)
|
|
237
|
+
|
|
238
|
+
logger.warning(f"KB scan job timed out: job_id={job_id}")
|
|
239
|
+
return _kb_scan_job_result(origin_urls, "scan job timed out", requested_count)
|
|
@@ -15,7 +15,7 @@ from typing import Tuple
|
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(constant.LOGGER_NAME)
|
|
17
17
|
REMOVE_LICENSE = ["warranty-disclaimer"]
|
|
18
|
-
regex = re.compile(r'licenseref-(
|
|
18
|
+
regex = re.compile(r'licenseref-([a-z0-9\.\-]+)', re.IGNORECASE)
|
|
19
19
|
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
|
|
20
20
|
KEYWORD_SPDX_ID = r'SPDX-License-Identifier\s*[\S]+'
|
|
21
21
|
KEYWORD_DOWNLOAD_LOC = r'DownloadLocation\s*[\S]+'
|
|
@@ -6,11 +6,7 @@
|
|
|
6
6
|
import os
|
|
7
7
|
import logging
|
|
8
8
|
import re
|
|
9
|
-
import json
|
|
10
|
-
import base64
|
|
11
9
|
import hashlib
|
|
12
|
-
import urllib.request
|
|
13
|
-
import urllib.error
|
|
14
10
|
import fosslight_util.constant as constant
|
|
15
11
|
from fosslight_util.oss_item import FileItem, OssItem, get_checksum_sha1
|
|
16
12
|
|
|
@@ -63,8 +59,9 @@ class SourceItem(FileItem):
|
|
|
63
59
|
self.oss_version = ""
|
|
64
60
|
|
|
65
61
|
self.checksum = get_checksum_sha1(value)
|
|
66
|
-
self.kb_origin_url = "" # URL from OSS KB
|
|
62
|
+
self.kb_origin_url = "" # URL from OSS KB
|
|
67
63
|
self.kb_evidence = "" # Evidence from KB API (exact_match or code snippet)
|
|
64
|
+
self._cached_kb_md5 = "" # MD5 precomputed for KB lookup (set by _collect_kb_file_hashes)
|
|
68
65
|
|
|
69
66
|
def __del__(self) -> None:
|
|
70
67
|
pass
|
|
@@ -124,37 +121,18 @@ class SourceItem(FileItem):
|
|
|
124
121
|
logger.debug(f"Failed to compute MD5 for {self.source_name_or_path}: {e}")
|
|
125
122
|
return md5_hex, wfp
|
|
126
123
|
|
|
127
|
-
def
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
request.add_header('Content-Type', 'application/json')
|
|
140
|
-
if kb_token:
|
|
141
|
-
request.add_header('Authorization', f'Bearer {kb_token}')
|
|
142
|
-
|
|
143
|
-
with urllib.request.urlopen(request, timeout=10) as response:
|
|
144
|
-
data = json.loads(response.read().decode())
|
|
145
|
-
if isinstance(data, dict):
|
|
146
|
-
return_code = data.get('return_code', -1)
|
|
147
|
-
if return_code == 0:
|
|
148
|
-
output = data.get('output', '')
|
|
149
|
-
if output:
|
|
150
|
-
return output
|
|
151
|
-
except urllib.error.URLError as e:
|
|
152
|
-
logger.debug(f"Failed to fetch origin_url from API for MD5 hash {md5_hash}: {e}")
|
|
153
|
-
except json.JSONDecodeError as e:
|
|
154
|
-
logger.debug(f"Failed to parse API response for MD5 hash {md5_hash}: {e}")
|
|
155
|
-
except Exception as e:
|
|
156
|
-
logger.debug(f"Error getting origin_url for MD5 hash {md5_hash}: {e}")
|
|
157
|
-
return ""
|
|
124
|
+
def _apply_kb_origin_url(self, origin_url: str) -> tuple[str, str, str]:
|
|
125
|
+
"""Apply KB origin URL and return (oss_name, oss_version, download_url)."""
|
|
126
|
+
self.kb_origin_url = origin_url
|
|
127
|
+
self.kb_evidence = "exact_match"
|
|
128
|
+
extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
|
|
129
|
+
if extracted_name:
|
|
130
|
+
self.oss_name = extracted_name
|
|
131
|
+
if extracted_version:
|
|
132
|
+
self.oss_version = extracted_version
|
|
133
|
+
download_url = repo_url if repo_url else origin_url
|
|
134
|
+
self.download_location = [download_url]
|
|
135
|
+
return self.oss_name, self.oss_version, download_url
|
|
158
136
|
|
|
159
137
|
def _extract_oss_info_from_url(self, url: str) -> tuple:
|
|
160
138
|
"""
|
|
@@ -196,7 +174,9 @@ class SourceItem(FileItem):
|
|
|
196
174
|
return "", "", ""
|
|
197
175
|
|
|
198
176
|
def set_oss_item(
|
|
199
|
-
self,
|
|
177
|
+
self,
|
|
178
|
+
path_to_scan: str = "",
|
|
179
|
+
kb_origin_urls: dict[str, str] | None = None,
|
|
200
180
|
) -> None:
|
|
201
181
|
self.oss_items = []
|
|
202
182
|
if self.download_location:
|
|
@@ -207,21 +187,15 @@ class SourceItem(FileItem):
|
|
|
207
187
|
self.oss_items.append(item)
|
|
208
188
|
else:
|
|
209
189
|
item = OssItem(self.oss_name, self.oss_version, self.licenses)
|
|
210
|
-
if
|
|
211
|
-
md5_hash
|
|
190
|
+
if kb_origin_urls and not self.is_license_text:
|
|
191
|
+
md5_hash = self._cached_kb_md5
|
|
192
|
+
if not md5_hash:
|
|
193
|
+
md5_hash, _wfp = self._get_hash(path_to_scan)
|
|
212
194
|
if md5_hash:
|
|
213
|
-
origin_url =
|
|
195
|
+
origin_url = kb_origin_urls.get(md5_hash, "")
|
|
214
196
|
if origin_url:
|
|
215
|
-
|
|
216
|
-
self.
|
|
217
|
-
extracted_name, extracted_version, repo_url = self._extract_oss_info_from_url(origin_url)
|
|
218
|
-
if extracted_name:
|
|
219
|
-
self.oss_name = extracted_name
|
|
220
|
-
if extracted_version:
|
|
221
|
-
self.oss_version = extracted_version
|
|
222
|
-
download_url = repo_url if repo_url else origin_url
|
|
223
|
-
self.download_location = [download_url]
|
|
224
|
-
item = OssItem(self.oss_name, self.oss_version, self.licenses, download_url)
|
|
197
|
+
oss_name, oss_version, download_url = self._apply_kb_origin_url(origin_url)
|
|
198
|
+
item = OssItem(oss_name, oss_version, self.licenses, download_url)
|
|
225
199
|
|
|
226
200
|
item.copyright = "\n".join(self.copyright)
|
|
227
201
|
item.comment = self.comment
|
|
@@ -25,12 +25,14 @@ from fosslight_util.exclude import get_excluded_paths
|
|
|
25
25
|
from .run_scanoss import run_scanoss_py
|
|
26
26
|
from .run_scanoss import get_scanoss_extra_info
|
|
27
27
|
import yaml
|
|
28
|
+
import tqdm
|
|
28
29
|
import argparse
|
|
29
30
|
from .run_spdx_extractor import get_spdx_downloads
|
|
30
31
|
from .run_manifest_extractor import get_manifest_licenses
|
|
31
|
-
from ._scan_item import SourceItem, resolve_kb_config
|
|
32
|
+
from ._scan_item import SourceItem, resolve_kb_config, is_notice_file
|
|
33
|
+
from ._kb_client import fetch_origin_urls_via_scan_job
|
|
32
34
|
from fosslight_util.oss_item import ScannerItem
|
|
33
|
-
from typing import Tuple
|
|
35
|
+
from typing import Optional, Tuple
|
|
34
36
|
from ._scan_item import is_manifest_file
|
|
35
37
|
import shutil
|
|
36
38
|
|
|
@@ -330,11 +332,57 @@ def mark_oss_info_correction_files_as_excluded(scan_results: list) -> None:
|
|
|
330
332
|
item.comment = OSS_INFO_CORRECTION_COMMENT
|
|
331
333
|
|
|
332
334
|
|
|
335
|
+
def _collect_kb_file_hashes(
|
|
336
|
+
scancode_result: list,
|
|
337
|
+
path_to_scan: str,
|
|
338
|
+
excluded_files: set,
|
|
339
|
+
hide_progress: bool,
|
|
340
|
+
) -> tuple[list[str], list[tuple[SourceItem, str]]]:
|
|
341
|
+
"""Collect MD5 hashes from scancode results and walk targets, plus (extra_item, md5) candidates.
|
|
342
|
+
|
|
343
|
+
Skips license/notice files and scancode_result items that already have download_location.
|
|
344
|
+
ScanOSS/SPDX results are merged into scancode_result before this runs.
|
|
345
|
+
"""
|
|
346
|
+
file_hashes: list[str] = []
|
|
347
|
+
extra_candidates: list[tuple[SourceItem, str]] = []
|
|
348
|
+
|
|
349
|
+
for item in scancode_result:
|
|
350
|
+
if item.is_license_text or is_notice_file(item.source_name_or_path):
|
|
351
|
+
continue
|
|
352
|
+
if item.download_location:
|
|
353
|
+
continue
|
|
354
|
+
md5_hash, _wfp = item._get_hash(path_to_scan)
|
|
355
|
+
if md5_hash:
|
|
356
|
+
item._cached_kb_md5 = md5_hash
|
|
357
|
+
file_hashes.append(md5_hash)
|
|
358
|
+
|
|
359
|
+
abs_path_to_scan = os.path.abspath(path_to_scan)
|
|
360
|
+
scancode_paths = {item.source_name_or_path for item in scancode_result}
|
|
361
|
+
|
|
362
|
+
files_to_scan = []
|
|
363
|
+
for root, _dirs, files in os.walk(path_to_scan):
|
|
364
|
+
for file in files:
|
|
365
|
+
files_to_scan.append(os.path.join(root, file))
|
|
366
|
+
|
|
367
|
+
for file_path in tqdm.tqdm(files_to_scan, desc="KB Hashing", disable=hide_progress):
|
|
368
|
+
rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
|
|
369
|
+
if rel_path in scancode_paths or rel_path in excluded_files or is_notice_file(file_path):
|
|
370
|
+
continue
|
|
371
|
+
extra_item = SourceItem(rel_path)
|
|
372
|
+
md5_hash, _wfp = extra_item._get_hash(path_to_scan)
|
|
373
|
+
if md5_hash:
|
|
374
|
+
extra_item._cached_kb_md5 = md5_hash
|
|
375
|
+
file_hashes.append(md5_hash)
|
|
376
|
+
extra_candidates.append((extra_item, md5_hash))
|
|
377
|
+
|
|
378
|
+
return file_hashes, extra_candidates
|
|
379
|
+
|
|
380
|
+
|
|
333
381
|
def merge_results(
|
|
334
382
|
scancode_result: list = [], scanoss_result: list = [], spdx_downloads: dict = {},
|
|
335
383
|
path_to_scan: str = "", run_kb: bool = False, manifest_licenses: dict = {},
|
|
336
384
|
excluded_files: set = None, hide_progress: bool = False, kb_url: str = "", kb_token: str = ""
|
|
337
|
-
) -> list:
|
|
385
|
+
) -> tuple[list, Optional[str], int, int]:
|
|
338
386
|
|
|
339
387
|
"""
|
|
340
388
|
Merge scanner results and spdx parsing result.
|
|
@@ -346,7 +394,7 @@ def merge_results(
|
|
|
346
394
|
:param excluded_files: set of relative paths to exclude from KB-only file discovery.
|
|
347
395
|
:param kb_url: KB API base URL.
|
|
348
396
|
:param kb_token: KB API bearer token.
|
|
349
|
-
:return merged_result
|
|
397
|
+
:return: (merged_result, kb failure message, requested file_hash count, returned match count).
|
|
350
398
|
"""
|
|
351
399
|
if excluded_files is None:
|
|
352
400
|
excluded_files = set()
|
|
@@ -381,32 +429,60 @@ def merge_results(
|
|
|
381
429
|
new_result_item.is_manifest_file = True
|
|
382
430
|
scancode_result.append(new_result_item)
|
|
383
431
|
|
|
432
|
+
kb_origin_urls: dict[str, str] = {}
|
|
433
|
+
kb_status_message: Optional[str] = None
|
|
434
|
+
kb_requested_count = 0
|
|
435
|
+
kb_returned_count = 0
|
|
436
|
+
extra_candidates: list[tuple[SourceItem, str]] = []
|
|
437
|
+
if run_kb:
|
|
438
|
+
file_hashes, extra_candidates = _collect_kb_file_hashes(
|
|
439
|
+
scancode_result, path_to_scan, excluded_files, hide_progress
|
|
440
|
+
)
|
|
441
|
+
if file_hashes:
|
|
442
|
+
kb_result = fetch_origin_urls_via_scan_job(file_hashes, kb_url, kb_token)
|
|
443
|
+
kb_origin_urls = kb_result.origin_urls
|
|
444
|
+
kb_status_message = kb_result.failure_message
|
|
445
|
+
kb_requested_count = kb_result.requested_count
|
|
446
|
+
kb_returned_count = kb_result.returned_count
|
|
447
|
+
|
|
384
448
|
for item in scancode_result:
|
|
385
|
-
item.set_oss_item(path_to_scan,
|
|
449
|
+
item.set_oss_item(path_to_scan, kb_origin_urls=kb_origin_urls)
|
|
386
450
|
|
|
387
451
|
# Add OSSItem for files in path_to_scan that are not in scancode_result
|
|
388
452
|
# when KB returns an origin URL for their MD5 hash (skip excluded_files)
|
|
389
453
|
if run_kb:
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
scancode_paths = {item.source_name_or_path for item in scancode_result}
|
|
393
|
-
|
|
394
|
-
files_to_scan = []
|
|
395
|
-
for root, _dirs, files in os.walk(path_to_scan):
|
|
396
|
-
for file in files:
|
|
397
|
-
files_to_scan.append(os.path.join(root, file))
|
|
398
|
-
|
|
399
|
-
for file_path in tqdm.tqdm(files_to_scan, desc="KB Scanning", disable=hide_progress):
|
|
400
|
-
rel_path = os.path.relpath(file_path, abs_path_to_scan).replace("\\", "/")
|
|
401
|
-
if rel_path in scancode_paths or rel_path in excluded_files:
|
|
402
|
-
continue
|
|
403
|
-
extra_item = SourceItem(rel_path)
|
|
404
|
-
extra_item.set_oss_item(path_to_scan, run_kb, kb_url, kb_token)
|
|
454
|
+
for extra_item, _md5_hash in extra_candidates:
|
|
455
|
+
extra_item.set_oss_item(path_to_scan, kb_origin_urls=kb_origin_urls)
|
|
405
456
|
if extra_item.download_location:
|
|
406
457
|
scancode_result.append(extra_item)
|
|
407
|
-
scancode_paths.add(rel_path)
|
|
408
458
|
|
|
409
|
-
return scancode_result
|
|
459
|
+
return scancode_result, kb_status_message, kb_requested_count, kb_returned_count
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _finalize_temp_output(
|
|
463
|
+
temp_output_path: str,
|
|
464
|
+
final_output_path: str,
|
|
465
|
+
publish: bool,
|
|
466
|
+
log: Optional[logging.Logger] = None,
|
|
467
|
+
) -> bool:
|
|
468
|
+
"""Copy scan artifacts from temp dir, then always remove the temp directory."""
|
|
469
|
+
if not temp_output_path or not os.path.isdir(temp_output_path):
|
|
470
|
+
return True
|
|
471
|
+
publish_ok = True
|
|
472
|
+
try:
|
|
473
|
+
if publish:
|
|
474
|
+
shutil.copytree(temp_output_path, final_output_path, dirs_exist_ok=True)
|
|
475
|
+
except Exception as ex:
|
|
476
|
+
publish_ok = False
|
|
477
|
+
if log:
|
|
478
|
+
log.error(f"Failed to publish scan artifacts: {ex}")
|
|
479
|
+
finally:
|
|
480
|
+
try:
|
|
481
|
+
shutil.rmtree(temp_output_path)
|
|
482
|
+
except Exception as ex:
|
|
483
|
+
if log:
|
|
484
|
+
log.debug(f"Failed to cleanup temp output directory: {ex}")
|
|
485
|
+
return publish_ok
|
|
410
486
|
|
|
411
487
|
|
|
412
488
|
def run_scanners(
|
|
@@ -454,77 +530,94 @@ def run_scanners(
|
|
|
454
530
|
output_path = os.getcwd()
|
|
455
531
|
final_output_path = output_path
|
|
456
532
|
output_path = os.path.join(os.path.dirname(output_path), f'.fosslight_temp_{start_time}')
|
|
533
|
+
publish_temp_output = False
|
|
534
|
+
logger = None
|
|
535
|
+
publish_ok = True
|
|
457
536
|
|
|
458
|
-
|
|
459
|
-
|
|
537
|
+
try:
|
|
538
|
+
logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
|
|
539
|
+
True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
|
|
460
540
|
|
|
461
|
-
|
|
541
|
+
logger.info(f"Tool Info : {result_log['Tool Info']}")
|
|
462
542
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
543
|
+
if '.xlsx' not in output_extensions and print_matched_text:
|
|
544
|
+
logger.warning("-m option is only available for excel.")
|
|
545
|
+
print_matched_text = False
|
|
466
546
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
547
|
+
if success:
|
|
548
|
+
if all_exclude_mode and len(all_exclude_mode) == 4:
|
|
549
|
+
(excluded_path_with_default_exclusion,
|
|
550
|
+
excluded_path_without_dot,
|
|
551
|
+
excluded_files,
|
|
552
|
+
cnt_file_except_skipped) = all_exclude_mode
|
|
553
|
+
else:
|
|
554
|
+
path_to_exclude_with_filename = path_to_exclude
|
|
555
|
+
(excluded_path_with_default_exclusion,
|
|
556
|
+
excluded_path_without_dot,
|
|
557
|
+
excluded_files,
|
|
558
|
+
cnt_file_except_skipped) = get_excluded_paths(path_to_scan, path_to_exclude_with_filename)
|
|
559
|
+
logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
|
|
560
|
+
|
|
561
|
+
if not selected_scanner:
|
|
562
|
+
selected_scanner = ALL_MODE
|
|
563
|
+
if selected_scanner in ['scancode', ALL_MODE]:
|
|
564
|
+
success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(
|
|
565
|
+
path_to_scan, output_file_name, write_json_file, num_cores, True,
|
|
566
|
+
print_matched_text, formats, called_by_cli, time_out, correct_mode,
|
|
567
|
+
correct_filepath, excluded_path_with_default_exclusion,
|
|
568
|
+
excluded_files, hide_progress,
|
|
569
|
+
)
|
|
570
|
+
excluded_files = set(excluded_files) if excluded_files else set()
|
|
571
|
+
if selected_scanner in ['scanoss', ALL_MODE]:
|
|
572
|
+
scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_path, formats, True, num_cores,
|
|
573
|
+
excluded_path_with_default_exclusion, excluded_files,
|
|
574
|
+
write_json_file, hide_progress)
|
|
575
|
+
|
|
576
|
+
run_kb_msg = ""
|
|
577
|
+
if selected_scanner in SCANNER_TYPE:
|
|
578
|
+
run_kb = True if selected_scanner in ['kb', ALL_MODE] else False
|
|
579
|
+
if run_kb:
|
|
580
|
+
if not check_kb_server_reachable(kb_url, kb_token):
|
|
581
|
+
run_kb = False
|
|
582
|
+
run_kb_msg = f"KB({kb_url}) Unreachable"
|
|
583
|
+
|
|
584
|
+
spdx_downloads, manifest_licenses = metadata_collector(path_to_scan, excluded_files)
|
|
585
|
+
merged_result, kb_status_message, kb_requested_count, kb_returned_count = merge_results(
|
|
586
|
+
scancode_result, scanoss_result, spdx_downloads,
|
|
587
|
+
path_to_scan, run_kb, manifest_licenses, excluded_files,
|
|
588
|
+
hide_progress, kb_url, kb_token,
|
|
589
|
+
)
|
|
590
|
+
if kb_status_message:
|
|
591
|
+
run_kb_msg = f"KB({kb_url}) {kb_status_message}"
|
|
592
|
+
elif run_kb and kb_requested_count > 0:
|
|
593
|
+
run_kb_msg = (
|
|
594
|
+
f"KB({kb_url}) response : {kb_returned_count}/"
|
|
595
|
+
f" requested: {kb_requested_count}"
|
|
596
|
+
)
|
|
597
|
+
mark_oss_info_correction_files_as_excluded(merged_result)
|
|
598
|
+
scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
|
|
599
|
+
print_matched_text, output_path, output_files, output_extensions, correct_mode,
|
|
600
|
+
correct_filepath, path_to_scan, excluded_path_without_dot, formats,
|
|
601
|
+
api_limit_exceed, cnt_file_except_skipped, final_output_path, run_kb_msg)
|
|
602
|
+
else:
|
|
603
|
+
print_help_msg_source_scanner()
|
|
604
|
+
result_log[RESULT_KEY] = "Unsupported scanner"
|
|
605
|
+
success = False
|
|
515
606
|
else:
|
|
516
|
-
|
|
517
|
-
result_log[RESULT_KEY] = "Unsupported scanner"
|
|
607
|
+
result_log[RESULT_KEY] = f"Format error. {msg}"
|
|
518
608
|
success = False
|
|
519
|
-
else:
|
|
520
|
-
result_log[RESULT_KEY] = f"Format error. {msg}"
|
|
521
|
-
success = False
|
|
522
609
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
610
|
+
publish_temp_output = True
|
|
611
|
+
finally:
|
|
612
|
+
publish_ok = _finalize_temp_output(output_path, final_output_path, publish_temp_output, logger)
|
|
613
|
+
|
|
614
|
+
if publish_temp_output and not publish_ok:
|
|
615
|
+
success = False
|
|
616
|
+
prev_msg = result_log.get(RESULT_KEY, "")
|
|
617
|
+
result_log[RESULT_KEY] = (
|
|
618
|
+
f"{prev_msg}, Failed to publish scan artifacts" if prev_msg
|
|
619
|
+
else "Failed to publish scan artifacts"
|
|
620
|
+
)
|
|
528
621
|
|
|
529
622
|
return success, result_log.get(RESULT_KEY, ""), scan_item, license_list, scanoss_result
|
|
530
623
|
|
|
@@ -63,14 +63,23 @@ def _apply_scancode_unset_workaround(kwargs: dict) -> None:
|
|
|
63
63
|
logger.debug("scancode UNSET workaround skipped: %s", ex)
|
|
64
64
|
|
|
65
65
|
|
|
66
|
+
def _directory_ignore_pattern(dir_name: str) -> str:
|
|
67
|
+
"""Path-based glob for a directory name (avoids matching the scan root itself)."""
|
|
68
|
+
normalized = dir_name.strip().strip("/").replace("\\", "/")
|
|
69
|
+
if not normalized:
|
|
70
|
+
return dir_name
|
|
71
|
+
return f"**/{normalized}/**"
|
|
72
|
+
|
|
73
|
+
|
|
66
74
|
def _default_scancode_coarse_ignore_patterns() -> frozenset:
|
|
67
75
|
"""
|
|
68
76
|
Coarse ignore patterns aligned with fosslight_util.get_excluded_paths() rules.
|
|
69
|
-
|
|
77
|
+
Directory names use path-based globs (e.g. **/tests/**) so they do not match
|
|
78
|
+
the scan root directory name itself.
|
|
70
79
|
"""
|
|
71
80
|
patterns = {".*"}
|
|
72
81
|
for name in PACKAGE_DIRECTORY + EXCLUDE_DIRECTORY:
|
|
73
|
-
patterns.add(name)
|
|
82
|
+
patterns.add(_directory_ignore_pattern(name))
|
|
74
83
|
for ext in EXCLUDE_FILE_EXTENSION:
|
|
75
84
|
patterns.add(f"*.{ext}")
|
|
76
85
|
for name in EXCLUDE_FILENAME:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fosslight_source
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: FOSSLight Source Scanner
|
|
5
5
|
Author: LG Electronics
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -26,6 +26,7 @@ Requires-Dist: wheel>=0.38.1
|
|
|
26
26
|
Requires-Dist: intbitset
|
|
27
27
|
Requires-Dist: fosslight_binary>=5.1.22
|
|
28
28
|
Requires-Dist: scancode-toolkit>=32.0.2
|
|
29
|
+
Requires-Dist: cryptography<49; platform_system == "Darwin" and platform_machine == "x86_64"
|
|
29
30
|
Requires-Dist: fingerprints==1.2.3
|
|
30
31
|
Requires-Dist: normality==2.6.1
|
|
31
32
|
Requires-Dist: psycopg2-binary>=2.9.10; python_version >= "3.13"
|
{fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/SOURCES.txt
RENAMED
|
@@ -4,6 +4,7 @@ README.md
|
|
|
4
4
|
pyproject.toml
|
|
5
5
|
src/fosslight_source/__init__.py
|
|
6
6
|
src/fosslight_source/_help.py
|
|
7
|
+
src/fosslight_source/_kb_client.py
|
|
7
8
|
src/fosslight_source/_license_matched.py
|
|
8
9
|
src/fosslight_source/_parsing_scancode_file_item.py
|
|
9
10
|
src/fosslight_source/_parsing_scanoss_file.py
|
|
@@ -3,10 +3,13 @@
|
|
|
3
3
|
# Copyright (c) 2020 LG Electronics Inc.
|
|
4
4
|
# SPDX-License-Identifier: Apache-2.0
|
|
5
5
|
import os
|
|
6
|
+
import shlex
|
|
6
7
|
import subprocess
|
|
7
8
|
import pytest
|
|
8
9
|
import shutil
|
|
9
10
|
import sys
|
|
11
|
+
import csv
|
|
12
|
+
import glob
|
|
10
13
|
|
|
11
14
|
# Add project root to sys.path for importing FL Source modules
|
|
12
15
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
@@ -18,6 +21,26 @@ from fosslight_source._parsing_scancode_file_item import (
|
|
|
18
21
|
)
|
|
19
22
|
|
|
20
23
|
remove_directories = ["test_scan", "test_scan2", "test_scan3"]
|
|
24
|
+
TEST_FILES_SCAN_DIR = "test_scan"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _parse_license_tokens(license_value: str) -> set[str]:
|
|
28
|
+
return {token.strip().lower() for token in (license_value or "").split(",") if token.strip()}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _read_src_csv_rows(csv_path: str) -> list[dict]:
|
|
32
|
+
with open(csv_path, "r", encoding="utf-8") as file:
|
|
33
|
+
return list(csv.DictReader(file, delimiter="\t"))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _rows_for_source(rows: list[dict], source_name: str) -> list[dict]:
|
|
37
|
+
return [row for row in rows if row.get("Source Path") == source_name]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _find_scan_csv(output_dir: str) -> str:
|
|
41
|
+
csv_files = sorted(glob.glob(os.path.join(output_dir, "*.csv")))
|
|
42
|
+
assert csv_files, f"No CSV report found under {output_dir}"
|
|
43
|
+
return csv_files[-1]
|
|
21
44
|
|
|
22
45
|
|
|
23
46
|
@pytest.fixture(scope="module", autouse=True)
|
|
@@ -31,8 +54,22 @@ def setup_test_result_dir():
|
|
|
31
54
|
|
|
32
55
|
|
|
33
56
|
def run_command(command):
|
|
34
|
-
|
|
35
|
-
|
|
57
|
+
command = command.strip()
|
|
58
|
+
if command.startswith("fosslight_source"):
|
|
59
|
+
args = shlex.split(command, posix=(os.name != "nt"))[1:]
|
|
60
|
+
if os.environ.get("FOSSLIGHT_USE_LOCAL_SRC"):
|
|
61
|
+
src_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src"))
|
|
62
|
+
env = os.environ.copy()
|
|
63
|
+
existing = env.get("PYTHONPATH", "")
|
|
64
|
+
env["PYTHONPATH"] = src_path if not existing else f"{src_path}{os.pathsep}{existing}"
|
|
65
|
+
cmd = [sys.executable, "-m", "fosslight_source.cli", *args]
|
|
66
|
+
process = subprocess.run(cmd, capture_output=True, text=True, env=env)
|
|
67
|
+
else:
|
|
68
|
+
cmd = ["fosslight_source", *args]
|
|
69
|
+
process = subprocess.run(cmd, capture_output=True, text=True)
|
|
70
|
+
else:
|
|
71
|
+
process = subprocess.run(command, shell=True, capture_output=True, text=True)
|
|
72
|
+
success = process.returncode == 0
|
|
36
73
|
return success, process.stdout if success else process.stderr
|
|
37
74
|
|
|
38
75
|
|
|
@@ -112,6 +149,32 @@ def test_run():
|
|
|
112
149
|
assert len(scan2_files) > 0, "Test Run: No scan files created in test_scan2 directory"
|
|
113
150
|
|
|
114
151
|
|
|
152
|
+
def test_test_files_scan_results():
|
|
153
|
+
os.makedirs(TEST_FILES_SCAN_DIR, exist_ok=True)
|
|
154
|
+
|
|
155
|
+
success, msg = run_command(
|
|
156
|
+
f"fosslight_source -p tests/test_files -s scancode -f csv -o {TEST_FILES_SCAN_DIR}/"
|
|
157
|
+
)
|
|
158
|
+
assert success is True, f"Test Run: test_files scan failed: {msg}"
|
|
159
|
+
|
|
160
|
+
csv_path = _find_scan_csv(TEST_FILES_SCAN_DIR)
|
|
161
|
+
rows = _read_src_csv_rows(csv_path)
|
|
162
|
+
|
|
163
|
+
sample_rows = _rows_for_source(rows, "sample.cpp")
|
|
164
|
+
assert sample_rows, "Test Run: sample.cpp not found in scan result"
|
|
165
|
+
for row in sample_rows:
|
|
166
|
+
licenses = _parse_license_tokens(row.get("License", ""))
|
|
167
|
+
assert "apache-2.0" in licenses, f"sample.cpp missing Apache-2.0 license: {row.get('License')}"
|
|
168
|
+
assert "mit" in licenses, f"sample.cpp missing MIT license: {row.get('License')}"
|
|
169
|
+
|
|
170
|
+
temp_rows = _rows_for_source(rows, "temp.cpp")
|
|
171
|
+
assert temp_rows, "Test Run: temp.cpp not found in scan result"
|
|
172
|
+
temp_row = temp_rows[0]
|
|
173
|
+
temp_licenses = _parse_license_tokens(temp_row.get("License", ""))
|
|
174
|
+
assert "apache-2.0" in temp_licenses, f"temp.cpp missing Apache-2.0 license: {temp_row.get('License')}"
|
|
175
|
+
assert (temp_row.get("Copyright Text") or "").strip(), "Test Run: temp.cpp copyright not extracted"
|
|
176
|
+
|
|
177
|
+
|
|
115
178
|
def test_help_command():
|
|
116
179
|
success, msg = run_command("fosslight_source -h")
|
|
117
180
|
assert success is True, f"Test Release: Help command failed :{msg}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/_parsing_scanoss_file.py
RENAMED
|
File without changes
|
{fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/run_manifest_extractor.py
RENAMED
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source/run_spdx_extractor.py
RENAMED
|
File without changes
|
|
File without changes
|
{fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{fosslight_source-2.2.17 → fosslight_source-2.3.1}/src/fosslight_source.egg-info/top_level.txt
RENAMED
|
File without changes
|