sbom4python 0.12.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sbom4python/__init__.py +2 -0
- sbom4python/cli.py +215 -0
- sbom4python/license.py +54 -0
- sbom4python/license_data/spdx_licenses.json +6258 -0
- sbom4python/scanner.py +788 -0
- sbom4python/version.py +4 -0
- sbom4python-0.12.5.dist-info/LICENSE +201 -0
- sbom4python-0.12.5.dist-info/METADATA +192 -0
- sbom4python-0.12.5.dist-info/RECORD +14 -0
- sbom4python-0.12.5.dist-info/WHEEL +5 -0
- sbom4python-0.12.5.dist-info/entry_points.txt +2 -0
- sbom4python-0.12.5.dist-info/top_level.txt +1 -0
- test/__init__.py +2 -0
- test/test_license.py +62 -0
sbom4python/scanner.py
ADDED
|
@@ -0,0 +1,788 @@
|
|
|
1
|
+
# Copyright (C) 2023 Anthony Harrison
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import ast
|
|
5
|
+
import configparser
|
|
6
|
+
import pathlib
|
|
7
|
+
import platform
|
|
8
|
+
import re
|
|
9
|
+
import string
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
import unicodedata
|
|
13
|
+
from typing import Iterable
|
|
14
|
+
|
|
15
|
+
if sys.version_info >= (3, 11):
|
|
16
|
+
import tomllib as toml
|
|
17
|
+
else:
|
|
18
|
+
import toml
|
|
19
|
+
|
|
20
|
+
if sys.version_info >= (3, 10):
|
|
21
|
+
from importlib import metadata as importlib_metadata
|
|
22
|
+
else:
|
|
23
|
+
import importlib_metadata
|
|
24
|
+
|
|
25
|
+
from lib4package.metadata import Metadata
|
|
26
|
+
from lib4sbom.data.document import SBOMDocument
|
|
27
|
+
from lib4sbom.data.package import SBOMPackage
|
|
28
|
+
from lib4sbom.data.relationship import SBOMRelationship
|
|
29
|
+
from lib4sbom.license import LicenseScanner
|
|
30
|
+
from sbom4files.filescanner import FileScanner
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SBOMScanner:
|
|
34
|
+
"""
|
|
35
|
+
Simple SBOM Generator for Python module.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
debug,
|
|
41
|
+
include_file=False,
|
|
42
|
+
exclude_license=False,
|
|
43
|
+
lifecycle="build",
|
|
44
|
+
include_service=False,
|
|
45
|
+
use_pip=False,
|
|
46
|
+
python_path: str = None,
|
|
47
|
+
):
|
|
48
|
+
self.record = []
|
|
49
|
+
self.debug = debug
|
|
50
|
+
self.include_file = include_file
|
|
51
|
+
self.include_license = exclude_license
|
|
52
|
+
self.include_service = include_service
|
|
53
|
+
self.sbom_package = SBOMPackage()
|
|
54
|
+
self.sbom_relationship = SBOMRelationship()
|
|
55
|
+
self.sbom_document = SBOMDocument()
|
|
56
|
+
self.file_scanner = FileScanner()
|
|
57
|
+
self.license = LicenseScanner()
|
|
58
|
+
self.sbom_files = {}
|
|
59
|
+
self.sbom_packages = {}
|
|
60
|
+
self.sbom_relationships = []
|
|
61
|
+
self.parent = "NOT_DEFINED"
|
|
62
|
+
self.package_metadata = Metadata("python", debug=self.debug)
|
|
63
|
+
self.python_version = platform.python_version()
|
|
64
|
+
self.set_lifecycle(lifecycle)
|
|
65
|
+
self.metadata = {}
|
|
66
|
+
self.use_pip = use_pip
|
|
67
|
+
self.python_path = pathlib.Path(python_path).expanduser()
|
|
68
|
+
|
|
69
|
+
def set_parent(self, module):
|
|
70
|
+
self.parent = f"Python-{module}"
|
|
71
|
+
|
|
72
|
+
def run_pip_cmd(self, params: Iterable[str]):
|
|
73
|
+
cmd = ["pip"]
|
|
74
|
+
if self.python_path.exists():
|
|
75
|
+
cmd.extend(("--python", str(self.python_path)))
|
|
76
|
+
|
|
77
|
+
cmd.extend(params)
|
|
78
|
+
return self.run_program(cmd)
|
|
79
|
+
|
|
80
|
+
def run_program(self, params: Iterable[str]):
|
|
81
|
+
res = subprocess.run(list(params), capture_output=True, text=True)
|
|
82
|
+
return res.stdout.splitlines()
|
|
83
|
+
|
|
84
|
+
def set_lifecycle(self, lifecycle):
|
|
85
|
+
self.sbom_document.set_value("lifecycle", lifecycle)
|
|
86
|
+
|
|
87
|
+
def _format_supplier(self, supplier_info, include_email=True):
|
|
88
|
+
# See https://stackoverflow.com/questions/1207457/convert-a-unicode-string-to-a-string-in-python-containing-extra-symbols
|
|
89
|
+
# And convert byte object to a string
|
|
90
|
+
name_str = (
|
|
91
|
+
unicodedata.normalize("NFKD", supplier_info)
|
|
92
|
+
.encode("ascii", "ignore")
|
|
93
|
+
.decode("utf-8")
|
|
94
|
+
)
|
|
95
|
+
if " " in name_str:
|
|
96
|
+
# Get names assumed to be at least two names <first> <surname>
|
|
97
|
+
names = re.findall(r"[a-zA-Z\.\]+ [A-Za-z]+ ", name_str)
|
|
98
|
+
else:
|
|
99
|
+
# Handle case where only single name provided
|
|
100
|
+
names = [name_str]
|
|
101
|
+
# Get email addresses
|
|
102
|
+
# Use RFC-5322 compliant regex (https://regex101.com/library/6EL6YF)
|
|
103
|
+
emails = re.findall(
|
|
104
|
+
r"((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))",
|
|
105
|
+
supplier_info,
|
|
106
|
+
re.IGNORECASE,
|
|
107
|
+
)
|
|
108
|
+
supplier = " ".join(n for n in names)
|
|
109
|
+
if include_email and len(emails) > 0:
|
|
110
|
+
# Only one email can be specified, so choose last one
|
|
111
|
+
supplier = supplier + "(" + emails[-1] + ")"
|
|
112
|
+
return re.sub(" +", " ", supplier.strip())
|
|
113
|
+
|
|
114
|
+
def _create_package(self, package, version, parent="-", requirements=None):
|
|
115
|
+
self.sbom_package.initialise()
|
|
116
|
+
offline = False
|
|
117
|
+
try:
|
|
118
|
+
self.package_metadata.get_package(package, version)
|
|
119
|
+
except Exception as ex:
|
|
120
|
+
offline = True
|
|
121
|
+
if self.debug:
|
|
122
|
+
print(f"[ERROR] Unable to retrieve metadata for {package} - {ex}")
|
|
123
|
+
self.sbom_package.set_name(package)
|
|
124
|
+
self.sbom_package.set_property("language", "Python")
|
|
125
|
+
self.sbom_package.set_property("python_version", self.python_version)
|
|
126
|
+
if version is not None:
|
|
127
|
+
self.sbom_package.set_version(version)
|
|
128
|
+
if not offline:
|
|
129
|
+
# External metadata may lag releases
|
|
130
|
+
self.sbom_package.set_value(
|
|
131
|
+
"release_date", self.package_metadata.get_latest_release_time()
|
|
132
|
+
)
|
|
133
|
+
if requirements is not None:
|
|
134
|
+
self.sbom_package.set_evidence(requirements)
|
|
135
|
+
if parent == "-":
|
|
136
|
+
self.sbom_package.set_type("application")
|
|
137
|
+
self.sbom_package.set_filesanalysis(self.include_file)
|
|
138
|
+
# Get package metadata
|
|
139
|
+
if len(self.metadata) > 0:
|
|
140
|
+
license_information = self.get("License")
|
|
141
|
+
supplier = self.get("Author") + " " + self.get("Author-email")
|
|
142
|
+
home_page = self.get("Home-page")
|
|
143
|
+
summary = self.get("Summary")
|
|
144
|
+
elif not offline:
|
|
145
|
+
license_information = self.package_metadata.get_license()
|
|
146
|
+
# Supplier info
|
|
147
|
+
supplier = self.package_metadata.get_originator()
|
|
148
|
+
if supplier is None:
|
|
149
|
+
supplier = ""
|
|
150
|
+
home_page = self.package_metadata.get_homepage()
|
|
151
|
+
if home_page is None:
|
|
152
|
+
home_page = ""
|
|
153
|
+
summary = self.package_metadata.get_description()
|
|
154
|
+
if summary is None:
|
|
155
|
+
summary = ""
|
|
156
|
+
else:
|
|
157
|
+
license_information = ""
|
|
158
|
+
supplier = ""
|
|
159
|
+
home_page = ""
|
|
160
|
+
summary = ""
|
|
161
|
+
license = self.license.find_license(license_information)
|
|
162
|
+
# Report license as reported by metadata. If not valid SPDX, report NOASSERTION
|
|
163
|
+
if license != license_information:
|
|
164
|
+
self.sbom_package.set_licensedeclared("NOASSERTION")
|
|
165
|
+
else:
|
|
166
|
+
self.sbom_package.set_licensedeclared(license)
|
|
167
|
+
# Report license if valid SPDX identifier
|
|
168
|
+
self.sbom_package.set_licenseconcluded(license)
|
|
169
|
+
# Add comment if metadata license was modified
|
|
170
|
+
license_comment = ""
|
|
171
|
+
if len(license_information) > 0 and license != license_information:
|
|
172
|
+
license_comment = f"{package} declares {license_information} which is not currently a valid SPDX License identifier or expression."
|
|
173
|
+
# Report if license is deprecated
|
|
174
|
+
if self.license.deprecated(license):
|
|
175
|
+
deprecated_comment = f"{license} is now deprecated."
|
|
176
|
+
if len(license_comment) > 0:
|
|
177
|
+
license_comment = f"{license_comment} {deprecated_comment}"
|
|
178
|
+
else:
|
|
179
|
+
license_comment = deprecated_comment
|
|
180
|
+
if len(license_comment) > 0:
|
|
181
|
+
self.sbom_package.set_licensecomments(license_comment)
|
|
182
|
+
if len(supplier.split()) > 3:
|
|
183
|
+
self.sbom_package.set_supplier(
|
|
184
|
+
"Organization", self._format_supplier(supplier)
|
|
185
|
+
)
|
|
186
|
+
elif len(supplier) > 1:
|
|
187
|
+
self.sbom_package.set_supplier("Person", self._format_supplier(supplier))
|
|
188
|
+
else:
|
|
189
|
+
self.sbom_package.set_supplier("UNKNOWN", "NOASSERTION")
|
|
190
|
+
if home_page != "":
|
|
191
|
+
self.sbom_package.set_homepage(home_page)
|
|
192
|
+
if summary != "":
|
|
193
|
+
self.sbom_package.set_summary(summary)
|
|
194
|
+
if self.metadata.get("Project-URL") is not None:
|
|
195
|
+
# Extra references
|
|
196
|
+
# Normalisation of labels
|
|
197
|
+
chars_to_remove = string.punctuation + string.whitespace
|
|
198
|
+
removal_map = str.maketrans("", "", chars_to_remove)
|
|
199
|
+
# Various synonyms of project URLs
|
|
200
|
+
categories = {
|
|
201
|
+
"docs": "documentation",
|
|
202
|
+
"source": "vcs",
|
|
203
|
+
"repository": "vcs",
|
|
204
|
+
"sourcecode": "vcs",
|
|
205
|
+
"github": "vcs",
|
|
206
|
+
"githubrepo": "vcs",
|
|
207
|
+
"gitlab": "vcs",
|
|
208
|
+
"bitbucket": "vcs",
|
|
209
|
+
"git": "vcs",
|
|
210
|
+
"sourceforge": "vcs",
|
|
211
|
+
"svn": "vcs",
|
|
212
|
+
"code": "vcs",
|
|
213
|
+
"changelog": "log",
|
|
214
|
+
"changes": "log",
|
|
215
|
+
"docschangelog": "log",
|
|
216
|
+
"whatsnew": "log",
|
|
217
|
+
"issues": "issue-tracker",
|
|
218
|
+
"bug": "issue-tracker",
|
|
219
|
+
"bugs": "issue-tracker",
|
|
220
|
+
"bugreports": "issue-tracker",
|
|
221
|
+
"bugtracker": "issue-tracker",
|
|
222
|
+
"issuetracker": "issue-tracker",
|
|
223
|
+
"tracker": "issue-tracker",
|
|
224
|
+
"githubissues": "issue-tracker",
|
|
225
|
+
"mailinglist": "mailing-list",
|
|
226
|
+
"mailinglists": "mailing-list",
|
|
227
|
+
"sourcedistribution": "source-distribution",
|
|
228
|
+
"ci": "build-system",
|
|
229
|
+
"cigithub": "build-system",
|
|
230
|
+
"cigithubactions": "build-system",
|
|
231
|
+
"buildsystem": "build-systen",
|
|
232
|
+
"releasenotes": "release-notes",
|
|
233
|
+
"release": "release-notes",
|
|
234
|
+
"releases": "release-notes",
|
|
235
|
+
"twitter": "social",
|
|
236
|
+
"discord": "social",
|
|
237
|
+
"home": "home-page",
|
|
238
|
+
"homepage": "home-page",
|
|
239
|
+
"githubhomepage": "home-page",
|
|
240
|
+
}
|
|
241
|
+
for ref in self.metadata.get("Project-URL"):
|
|
242
|
+
category = ref.split(", ")[0].translate(removal_map).lower()
|
|
243
|
+
locator = ref.split(", ")[1]
|
|
244
|
+
# See if synonymn
|
|
245
|
+
if categories.get(category) is not None:
|
|
246
|
+
if self.debug:
|
|
247
|
+
print(
|
|
248
|
+
f"Updating category from {category} to {categories[category]}"
|
|
249
|
+
)
|
|
250
|
+
category = categories[category]
|
|
251
|
+
if category == "home-page":
|
|
252
|
+
self.sbom_package.set_homepage(locator)
|
|
253
|
+
else:
|
|
254
|
+
self.sbom_package.set_externalreference("OTHER", category, locator)
|
|
255
|
+
if self.metadata.get("Download-URL") is None:
|
|
256
|
+
if version is None:
|
|
257
|
+
self.sbom_package.set_downloadlocation(
|
|
258
|
+
f"https://pypi.org/project/{package}/#files"
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
self.sbom_package.set_downloadlocation(
|
|
262
|
+
f"https://pypi.org/project/{package}/{version}/#files"
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
self.sbom_package.set_downloadlocation(self.metadata.get("Download-URL"))
|
|
266
|
+
# External references
|
|
267
|
+
if version is not None:
|
|
268
|
+
self.sbom_package.set_purl(f"pkg:pypi/{package}@{version}")
|
|
269
|
+
else:
|
|
270
|
+
self.sbom_package.set_purl(f"pkg:pypi/{package}")
|
|
271
|
+
if len(supplier) > 1:
|
|
272
|
+
component_supplier = self._format_supplier(supplier, include_email=False)
|
|
273
|
+
if version is not None:
|
|
274
|
+
cpe_version = version.replace(":", "\\:")
|
|
275
|
+
else:
|
|
276
|
+
cpe_version = ""
|
|
277
|
+
self.sbom_package.set_cpe(
|
|
278
|
+
f"cpe:2.3:a:{component_supplier.replace(' ', '_').lower()}:{package}:{cpe_version}:*:*:*:*:*:*:*"
|
|
279
|
+
)
|
|
280
|
+
checksum, checksum_algorithm = self.package_metadata.get_checksum(
|
|
281
|
+
version=version
|
|
282
|
+
)
|
|
283
|
+
if checksum is not None:
|
|
284
|
+
self.sbom_package.set_checksum(checksum_algorithm, checksum)
|
|
285
|
+
# Copyright
|
|
286
|
+
self.sbom_package.set_copyrighttext("NOASSERTION")
|
|
287
|
+
# Store package data
|
|
288
|
+
self.sbom_packages[
|
|
289
|
+
(
|
|
290
|
+
self.sbom_package.get_name(),
|
|
291
|
+
self.sbom_package.get_value("version"),
|
|
292
|
+
)
|
|
293
|
+
] = self.sbom_package.get_package()
|
|
294
|
+
|
|
295
|
+
def _create_relationship(self, package, parent="-"):
|
|
296
|
+
self.sbom_relationship.initialise()
|
|
297
|
+
if parent != "-":
|
|
298
|
+
self.sbom_relationship.set_relationship(
|
|
299
|
+
parent.lower(), "DEPENDS_ON", package
|
|
300
|
+
)
|
|
301
|
+
else:
|
|
302
|
+
self.sbom_relationship.set_relationship(self.parent, "DESCRIBES", package)
|
|
303
|
+
self.sbom_relationships.append(self.sbom_relationship.get_relationship())
|
|
304
|
+
|
|
305
|
+
def analyze_code(self, filename):
|
|
306
|
+
"""Analyzes Python code for potential external service interactions.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
filename: The Python source file.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
A list of potential external service interactions.
|
|
313
|
+
"""
|
|
314
|
+
potential_external_services = []
|
|
315
|
+
modules = ["requests", "urllib", "httplib2"]
|
|
316
|
+
potential_endpoint = []
|
|
317
|
+
try:
|
|
318
|
+
with open(filename, "r", errors="replace") as f:
|
|
319
|
+
source_code = f.read()
|
|
320
|
+
tree = ast.parse(source_code)
|
|
321
|
+
|
|
322
|
+
for node in ast.walk(tree):
|
|
323
|
+
if isinstance(node, ast.Attribute):
|
|
324
|
+
# Check for function calls on http libraries like requests or urllib
|
|
325
|
+
if (
|
|
326
|
+
isinstance(node.value, ast.Name)
|
|
327
|
+
and (node.value.id in modules)
|
|
328
|
+
and node.attr in ["get", "post", "put", "delete"]
|
|
329
|
+
):
|
|
330
|
+
if [
|
|
331
|
+
node.value.id,
|
|
332
|
+
node.attr,
|
|
333
|
+
] not in potential_external_services:
|
|
334
|
+
potential_external_services.append(
|
|
335
|
+
[node.value.id, node.attr]
|
|
336
|
+
)
|
|
337
|
+
elif isinstance(node, ast.Constant):
|
|
338
|
+
if node.value is not None:
|
|
339
|
+
constant = str(node.value)
|
|
340
|
+
if (
|
|
341
|
+
constant.startswith("http")
|
|
342
|
+
and "//" in constant
|
|
343
|
+
and len(constant) > 8
|
|
344
|
+
):
|
|
345
|
+
# print (filename, constant)
|
|
346
|
+
potential_endpoint.append(constant)
|
|
347
|
+
except FileNotFoundError:
|
|
348
|
+
print(f"[ERROR] {filename} not found")
|
|
349
|
+
except SyntaxError:
|
|
350
|
+
# print(f"[ERROR] Unable to process {filename}.")
|
|
351
|
+
pass
|
|
352
|
+
if len(potential_external_services) > 0 and len(potential_endpoint) > 0:
|
|
353
|
+
if self.debug:
|
|
354
|
+
print(f"Potential endpoint in {filename}")
|
|
355
|
+
for i in potential_endpoint:
|
|
356
|
+
print(i)
|
|
357
|
+
for i in potential_external_services:
|
|
358
|
+
print(i)
|
|
359
|
+
|
|
360
|
+
return potential_endpoint
|
|
361
|
+
else:
|
|
362
|
+
return []
|
|
363
|
+
|
|
364
|
+
def _extract_package_name(self, requirement_string):
|
|
365
|
+
for i, char in enumerate(requirement_string):
|
|
366
|
+
# Ignore optional dependencies
|
|
367
|
+
if "extra" in requirement_string:
|
|
368
|
+
return ""
|
|
369
|
+
# Paqckage names only contain alphanumeric characters and -_
|
|
370
|
+
if not char.isalnum() and char not in ["-", "_"]:
|
|
371
|
+
return requirement_string[:i]
|
|
372
|
+
return requirement_string
|
|
373
|
+
|
|
374
|
+
def _extract_package_names(self, requirements_list):
|
|
375
|
+
return [self._extract_package_name(req) for req in requirements_list]
|
|
376
|
+
|
|
377
|
+
def _getpackage_metadata(self, module):
|
|
378
|
+
metadata = {}
|
|
379
|
+
if self.use_pip:
|
|
380
|
+
out = self.run_pip_cmd(("show", module))
|
|
381
|
+
for line in out:
|
|
382
|
+
entry = line.split(":")
|
|
383
|
+
# If: this line contain an non-empty entry delimited by ':'
|
|
384
|
+
if (len(entry) == 2) and (entry[1] and not (entry[1].isspace())):
|
|
385
|
+
# Store all data after keyword
|
|
386
|
+
metadata[entry[0]] = (
|
|
387
|
+
line.split(f"{entry[0]}:", 1)[1].strip().rstrip("\n")
|
|
388
|
+
)
|
|
389
|
+
elif len(entry) > 2:
|
|
390
|
+
# Likely to include URL
|
|
391
|
+
metadata[entry[0]] = (
|
|
392
|
+
line.split(f"{entry[0]}:", 1)[1].strip().rstrip("\n")
|
|
393
|
+
)
|
|
394
|
+
else:
|
|
395
|
+
try:
|
|
396
|
+
if self.debug:
|
|
397
|
+
print(f"Retrieve metadata for {module}")
|
|
398
|
+
package_data = importlib_metadata.metadata(module)
|
|
399
|
+
except importlib_metadata.PackageNotFoundError:
|
|
400
|
+
if self.debug:
|
|
401
|
+
print(f"Package Not Found : {module}")
|
|
402
|
+
package_data = []
|
|
403
|
+
if len(package_data) == 0:
|
|
404
|
+
if self.debug:
|
|
405
|
+
print(f"Unable to retrieve metadata for {module}")
|
|
406
|
+
return metadata
|
|
407
|
+
package_metadata = dict(package_data)
|
|
408
|
+
if self.debug:
|
|
409
|
+
print(f"Package metadata for {module}")
|
|
410
|
+
for key, value in package_metadata.items():
|
|
411
|
+
print(key, value)
|
|
412
|
+
# Store subset of metadata (same as pip show <module>)
|
|
413
|
+
for attribute in [
|
|
414
|
+
"Name",
|
|
415
|
+
"Version",
|
|
416
|
+
"Summary",
|
|
417
|
+
"Home-page",
|
|
418
|
+
"Author",
|
|
419
|
+
"Author-email",
|
|
420
|
+
"License",
|
|
421
|
+
"Download-URL",
|
|
422
|
+
]:
|
|
423
|
+
if package_metadata.get(attribute) is not None:
|
|
424
|
+
metadata[attribute] = package_metadata[attribute]
|
|
425
|
+
# License-Expresssion is preferred to License
|
|
426
|
+
if package_metadata.get("License-Expression") is not None:
|
|
427
|
+
metadata["License"] = package_metadata["License-Expression"]
|
|
428
|
+
# Project-URL (multiple)
|
|
429
|
+
if package_metadata.get("Project-URL"):
|
|
430
|
+
metadata["Project-URL"] = package_data.get_all("Project-URL")
|
|
431
|
+
# Requires-Dist (multiple)
|
|
432
|
+
if package_metadata.get("Requires-Dist"):
|
|
433
|
+
requires = package_data.get_all("Requires-Dist")
|
|
434
|
+
else:
|
|
435
|
+
requires = None
|
|
436
|
+
# Use classifier if no license
|
|
437
|
+
if metadata.get("License") is not None:
|
|
438
|
+
if metadata["License"] == "UNKNOWN":
|
|
439
|
+
metadata["License"] = None
|
|
440
|
+
elif "see license" in metadata["License"].lower():
|
|
441
|
+
# If license has text similar to 'see license file', reset
|
|
442
|
+
metadata["License"] = None
|
|
443
|
+
if (
|
|
444
|
+
metadata.get("License") is None
|
|
445
|
+
and package_metadata.get("Classifier") is not None
|
|
446
|
+
):
|
|
447
|
+
for i in package_data.get_all("Classifier"):
|
|
448
|
+
if i.startswith("License"):
|
|
449
|
+
# Extract license from classifier
|
|
450
|
+
license_name = i.split("::")[-1].strip()
|
|
451
|
+
if metadata.get("License") is None:
|
|
452
|
+
metadata["License"] = license_name
|
|
453
|
+
else:
|
|
454
|
+
metadata[
|
|
455
|
+
"License"
|
|
456
|
+
] = f'{metadata["License"]} AND {license_name}'
|
|
457
|
+
# Extract dependencies (if any)
|
|
458
|
+
if requires is not None:
|
|
459
|
+
# Find dependent packages
|
|
460
|
+
if self.debug:
|
|
461
|
+
print(f"Dependencies for {module} - {requires}")
|
|
462
|
+
|
|
463
|
+
package_names = self._extract_package_names(requires)
|
|
464
|
+
|
|
465
|
+
package_dependendents = ""
|
|
466
|
+
for name in package_names:
|
|
467
|
+
# Ignore extra packages
|
|
468
|
+
if len(name) > 0:
|
|
469
|
+
package_dependendents = (
|
|
470
|
+
package_dependendents + name.split(" ")[0] + ", "
|
|
471
|
+
)
|
|
472
|
+
# Remove extra punctuation
|
|
473
|
+
metadata["Requires"] = package_dependendents[:-2]
|
|
474
|
+
else:
|
|
475
|
+
metadata["Requires"] = ""
|
|
476
|
+
if self.debug:
|
|
477
|
+
print(f"Metadata for {module} - {metadata}")
|
|
478
|
+
return metadata
|
|
479
|
+
|
|
480
|
+
def process_module(self, module, parent="-"):
|
|
481
|
+
if self.debug:
|
|
482
|
+
print(f"Process Module {module}")
|
|
483
|
+
self.metadata = self._getpackage_metadata(module.strip())
|
|
484
|
+
# If module not found, no metadata returned
|
|
485
|
+
if len(self.metadata) > 0:
|
|
486
|
+
package = self.get("Name").lower().replace("_", "-")
|
|
487
|
+
version = self.get("Version")
|
|
488
|
+
if (package, version) in self.sbom_packages:
|
|
489
|
+
if self.debug:
|
|
490
|
+
print(f"Already processed {package} {version}")
|
|
491
|
+
# Prevent metadata being reprocessed
|
|
492
|
+
self.metadata = {}
|
|
493
|
+
else:
|
|
494
|
+
self._create_package(package, version, parent)
|
|
495
|
+
self._create_relationship(package, parent)
|
|
496
|
+
if self.include_file:
|
|
497
|
+
package = self.get("Name").lower().replace("-", "_")
|
|
498
|
+
directory_location = f'{self.get("Location")}/{package}'
|
|
499
|
+
file_dir = pathlib.Path(directory_location)
|
|
500
|
+
if self.debug:
|
|
501
|
+
print(f"Directory for {package}: {file_dir}")
|
|
502
|
+
if file_dir.exists():
|
|
503
|
+
filtered = [x for x in file_dir.glob("**/*")]
|
|
504
|
+
else:
|
|
505
|
+
# Module is only a single file
|
|
506
|
+
filtered = [pathlib.Path(f'{self.get("Location")}/{package}')]
|
|
507
|
+
if self.debug:
|
|
508
|
+
print(f"Filenames: {filtered}")
|
|
509
|
+
for entry in filtered:
|
|
510
|
+
# Ignore compiled code
|
|
511
|
+
if str(entry).endswith(".pyc"):
|
|
512
|
+
continue
|
|
513
|
+
if self.debug:
|
|
514
|
+
print(f"Analyse file in {entry}")
|
|
515
|
+
if self.include_service:
|
|
516
|
+
external_services = self.analyze_code(entry)
|
|
517
|
+
if len(external_services) > 0:
|
|
518
|
+
print(f"External services in {entry}")
|
|
519
|
+
|
|
520
|
+
if self.file_scanner.scan_file(entry):
|
|
521
|
+
self.sbom_files[
|
|
522
|
+
self.file_scanner.get_name()
|
|
523
|
+
] = self.file_scanner.get_file()
|
|
524
|
+
# Add relationship
|
|
525
|
+
self.sbom_relationship.initialise()
|
|
526
|
+
self.sbom_relationship.set_relationship(
|
|
527
|
+
package, "CONTAINS", self.file_scanner.get_name()
|
|
528
|
+
)
|
|
529
|
+
self.sbom_relationship.set_relationship_id(
|
|
530
|
+
self.sbom_package.get_value("id"),
|
|
531
|
+
self.file_scanner.get_value("id"),
|
|
532
|
+
)
|
|
533
|
+
self.sbom_relationship.set_target_type("file")
|
|
534
|
+
self.sbom_relationships.append(
|
|
535
|
+
self.sbom_relationship.get_relationship()
|
|
536
|
+
)
|
|
537
|
+
elif self.debug:
|
|
538
|
+
print(f"Module {module} not found")
|
|
539
|
+
return len(self.metadata) > 0
|
|
540
|
+
|
|
541
|
+
def get(self, attribute):
|
|
542
|
+
if self.metadata.get(attribute) is not None:
|
|
543
|
+
return self.metadata.get(attribute, "").lstrip()
|
|
544
|
+
return ""
|
|
545
|
+
|
|
546
|
+
def get_files(self):
|
|
547
|
+
return self.sbom_files
|
|
548
|
+
|
|
549
|
+
def get_packages(self):
|
|
550
|
+
return self.sbom_packages
|
|
551
|
+
|
|
552
|
+
def get_relationships(self):
|
|
553
|
+
if self.debug:
|
|
554
|
+
print(self.sbom_relationships)
|
|
555
|
+
return self.sbom_relationships
|
|
556
|
+
|
|
557
|
+
def get_document(self):
|
|
558
|
+
return self.sbom_document.get_document()
|
|
559
|
+
|
|
560
|
+
def get_parent(self):
|
|
561
|
+
return self.parent
|
|
562
|
+
|
|
563
|
+
def analyze(self, parent, dependencies):
|
|
564
|
+
if len(dependencies) == 0:
|
|
565
|
+
return
|
|
566
|
+
else:
|
|
567
|
+
for r in dependencies.split(","):
|
|
568
|
+
if self.process_module(r, parent):
|
|
569
|
+
self.analyze(r.strip(), self.get("Requires"))
|
|
570
|
+
|
|
571
|
+
def process_python_module(self, module_name):
|
|
572
|
+
self.set_parent(module_name)
|
|
573
|
+
if self.process_module(module_name):
|
|
574
|
+
self.analyze(self.get("Name"), self.get("Requires"))
|
|
575
|
+
|
|
576
|
+
def _get_installed_modules(self):
|
|
577
|
+
modules = []
|
|
578
|
+
if self.use_pip:
|
|
579
|
+
out = self.run_pip_cmd(("list",))
|
|
580
|
+
if len(out) > 0:
|
|
581
|
+
# Ignore headers in output stream
|
|
582
|
+
for m in out[2:]:
|
|
583
|
+
modules.append(m.split(" ")[0])
|
|
584
|
+
else:
|
|
585
|
+
installed_packages_info = importlib_metadata.distributions()
|
|
586
|
+
# modules = sorted(
|
|
587
|
+
# [p.metadata["Name"].lower() for p in installed_packages_info]
|
|
588
|
+
# )
|
|
589
|
+
modules = sorted([p.metadata["Name"] for p in installed_packages_info])
|
|
590
|
+
if self.debug:
|
|
591
|
+
print(modules)
|
|
592
|
+
return modules
|
|
593
|
+
|
|
594
|
+
def process_system(self):
|
|
595
|
+
modules = self._get_installed_modules()
|
|
596
|
+
self.set_parent("system")
|
|
597
|
+
for module_name in modules:
|
|
598
|
+
if self.process_module(module_name):
|
|
599
|
+
self.analyze(self.get("Name"), self.get("Requires"))
|
|
600
|
+
|
|
601
|
+
def process_requirements(self, filename):
|
|
602
|
+
if filename.endswith(".toml"):
|
|
603
|
+
# Could be a pyproject or pylock file
|
|
604
|
+
self.process_pyproject(filename)
|
|
605
|
+
self.process_pylock(filename)
|
|
606
|
+
elif filename.endswith(".cfg"):
|
|
607
|
+
self.process_setup_cfg(filename)
|
|
608
|
+
elif filename.endswith(".py"):
|
|
609
|
+
self.process_setup_py(filename)
|
|
610
|
+
elif filename.endswith(".txt"):
|
|
611
|
+
self.process_requirements_file(filename)
|
|
612
|
+
elif filename.endswith(".lock"):
|
|
613
|
+
self.process_uvlock_file(filename)
|
|
614
|
+
elif self.debug:
|
|
615
|
+
print(f"Unable to process requirements file {filename}")
|
|
616
|
+
|
|
617
|
+
def _process_requirement_dependency(self, dependency, filename):
|
|
618
|
+
dependency = dependency.split("#")[0].strip()
|
|
619
|
+
if len(dependency) > 0:
|
|
620
|
+
# Ignore anything after ; e.g. python_version<"3.8"
|
|
621
|
+
element = dependency.strip().split(";")[0]
|
|
622
|
+
# Check for pinned dependency
|
|
623
|
+
component = element.split("==")
|
|
624
|
+
if len(component) == 2:
|
|
625
|
+
# Package and version found
|
|
626
|
+
package = component[0]
|
|
627
|
+
version = component[1]
|
|
628
|
+
if self.debug:
|
|
629
|
+
print(f"Processing {package} version {version}")
|
|
630
|
+
else:
|
|
631
|
+
# Not pinned version
|
|
632
|
+
package = self._extract_package_name(element.split(" ")[0])
|
|
633
|
+
version = None
|
|
634
|
+
if self.debug:
|
|
635
|
+
print(f"Processing {package}")
|
|
636
|
+
self._create_package(package, version, requirements=filename)
|
|
637
|
+
self._create_relationship(package)
|
|
638
|
+
|
|
639
|
+
def process_requirements_file(self, filename):
|
|
640
|
+
# Process a requirements.txt file
|
|
641
|
+
if len(filename) > 0:
|
|
642
|
+
# Check file exists
|
|
643
|
+
filePath = pathlib.Path(filename)
|
|
644
|
+
# Check path exists and is a valid file
|
|
645
|
+
if filePath.exists() and filePath.is_file():
|
|
646
|
+
with open(filename) as dir_file:
|
|
647
|
+
lines = dir_file.readlines()
|
|
648
|
+
self.set_lifecycle("pre-build")
|
|
649
|
+
self.set_parent(filename)
|
|
650
|
+
for line in lines:
|
|
651
|
+
self._process_requirement_dependency(line, filename)
|
|
652
|
+
|
|
653
|
+
def process_pyproject(self, filename):
|
|
654
|
+
# Process pyproject.toml file
|
|
655
|
+
if len(filename) > 0:
|
|
656
|
+
# Check file exists
|
|
657
|
+
filePath = pathlib.Path(filename)
|
|
658
|
+
# Check path exists and is a valid file
|
|
659
|
+
if filePath.exists() and filePath.is_file():
|
|
660
|
+
with open(filename, "rb") as file:
|
|
661
|
+
pyproject_data = toml.load(file)
|
|
662
|
+
if "project" in pyproject_data:
|
|
663
|
+
if "dependencies" in pyproject_data["project"]:
|
|
664
|
+
dependencies = pyproject_data["project"]["dependencies"]
|
|
665
|
+
if self.debug:
|
|
666
|
+
print(dependencies)
|
|
667
|
+
self.set_lifecycle("pre-build")
|
|
668
|
+
self.set_parent(filename)
|
|
669
|
+
for dependency in dependencies:
|
|
670
|
+
self._process_requirement_dependency(
|
|
671
|
+
dependency, filename
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
def process_setup_cfg(self, filename):
|
|
675
|
+
# Process setup.cfg file
|
|
676
|
+
if len(filename) > 0:
|
|
677
|
+
# Check file exists
|
|
678
|
+
filePath = pathlib.Path(filename)
|
|
679
|
+
# Check path exists and is a valid file
|
|
680
|
+
if filePath.exists() and filePath.is_file():
|
|
681
|
+
config = configparser.ConfigParser()
|
|
682
|
+
config.read(filename)
|
|
683
|
+
if "options" in config.sections():
|
|
684
|
+
if "install_requires" in config["options"]:
|
|
685
|
+
dependencies = config["options"]["install_requires"]
|
|
686
|
+
if self.debug:
|
|
687
|
+
print(dependencies)
|
|
688
|
+
self.set_lifecycle("pre-build")
|
|
689
|
+
self.set_parent(filename)
|
|
690
|
+
for dependency in dependencies.splitlines():
|
|
691
|
+
self._process_requirement_dependency(dependency, filename)
|
|
692
|
+
|
|
693
|
+
def process_setup_py(self, filename):
|
|
694
|
+
# Process setup.py file
|
|
695
|
+
if len(filename) > 0:
|
|
696
|
+
# Check file exists
|
|
697
|
+
filePath = pathlib.Path(filename)
|
|
698
|
+
# Check path exists and is a valid file
|
|
699
|
+
if filePath.exists() and filePath.is_file():
|
|
700
|
+
dependencies = []
|
|
701
|
+
with open(filename, "r") as setup_file:
|
|
702
|
+
content = setup_file.read()
|
|
703
|
+
# Read the file into a stream and search for list if dependencies specified by install_requires
|
|
704
|
+
stream = content.replace("\n", "")
|
|
705
|
+
match = re.search(r"install_requires\s*=\s*\[([^\]]+)\]", stream)
|
|
706
|
+
if match:
|
|
707
|
+
dependency_list = match.group(1).strip()
|
|
708
|
+
dependencies = [
|
|
709
|
+
dep.strip().replace('"', "").replace("'", "")
|
|
710
|
+
for dep in dependency_list.split(",")
|
|
711
|
+
if len(dep) > 0
|
|
712
|
+
]
|
|
713
|
+
# Method 2: Handle multiline string with .split()
|
|
714
|
+
# Handles: install_requires = """package==1.0\npackage2>=2.0""".split()
|
|
715
|
+
# Also handles single quotes: install_requires = '''...'''.split()
|
|
716
|
+
if not dependencies:
|
|
717
|
+
split_match = re.search(
|
|
718
|
+
r'install_requires\s*=\s*["\'"]{3}([^"\']+)["\'"]{3}\.split\(\)',
|
|
719
|
+
content,
|
|
720
|
+
re.DOTALL,
|
|
721
|
+
)
|
|
722
|
+
if split_match:
|
|
723
|
+
# Extract dependencies from the multiline string
|
|
724
|
+
deps_block = split_match.group(1).strip()
|
|
725
|
+
# Split by newlines and filter out empty lines
|
|
726
|
+
dependencies = [
|
|
727
|
+
line.strip()
|
|
728
|
+
for line in deps_block.split("\n")
|
|
729
|
+
if line.strip() and not line.strip().startswith("#")
|
|
730
|
+
]
|
|
731
|
+
if self.debug:
|
|
732
|
+
print(dependencies)
|
|
733
|
+
self.set_lifecycle("pre-build")
|
|
734
|
+
self.set_parent(filename)
|
|
735
|
+
for dependency in dependencies:
|
|
736
|
+
self._process_requirement_dependency(dependency, filename)
|
|
737
|
+
|
|
738
|
+
def process_pylock(self, filename):
|
|
739
|
+
# Process pylock.toml file
|
|
740
|
+
if len(filename) > 0:
|
|
741
|
+
# Check file exists
|
|
742
|
+
filePath = pathlib.Path(filename)
|
|
743
|
+
# Check path exists and is a valid file
|
|
744
|
+
if filePath.exists() and filePath.is_file():
|
|
745
|
+
with open(filename, "rb") as file:
|
|
746
|
+
pylock_data = toml.load(file)
|
|
747
|
+
if "lock-version" in pylock_data:
|
|
748
|
+
if self.debug:
|
|
749
|
+
print(pylock_data)
|
|
750
|
+
if "packages" in pylock_data:
|
|
751
|
+
self.set_lifecycle("pre-build")
|
|
752
|
+
self.set_parent(filename)
|
|
753
|
+
for package in pylock_data["packages"]:
|
|
754
|
+
if "version" in package:
|
|
755
|
+
self._process_requirement_dependency(
|
|
756
|
+
f"{package['name']}=={package['version']}",
|
|
757
|
+
filename,
|
|
758
|
+
)
|
|
759
|
+
if "dependencies" in package:
|
|
760
|
+
for dependency in package["dependencies"]:
|
|
761
|
+
self._create_relationship(
|
|
762
|
+
dependency["name"], package["name"]
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
def process_uvlock_file(self, filename):
|
|
766
|
+
# Process uv.lock file
|
|
767
|
+
if len(filename) > 0:
|
|
768
|
+
# Check file exists
|
|
769
|
+
filePath = pathlib.Path(filename)
|
|
770
|
+
# Check path exists and is a valid file
|
|
771
|
+
if filePath.exists() and filePath.is_file():
|
|
772
|
+
with open(filename, "rb") as file:
|
|
773
|
+
uvlock_data = toml.load(file)
|
|
774
|
+
if self.debug:
|
|
775
|
+
print(uvlock_data)
|
|
776
|
+
if "package" in uvlock_data:
|
|
777
|
+
self.set_lifecycle("build")
|
|
778
|
+
self.set_parent(filename)
|
|
779
|
+
for package in uvlock_data["package"]:
|
|
780
|
+
if "version" in package:
|
|
781
|
+
self._process_requirement_dependency(
|
|
782
|
+
f"{package['name']}=={package['version']}", filename
|
|
783
|
+
)
|
|
784
|
+
if "dependencies" in package:
|
|
785
|
+
for dependency in package["dependencies"]:
|
|
786
|
+
self._create_relationship(
|
|
787
|
+
dependency["name"], package["name"]
|
|
788
|
+
)
|