sbom4python 0.12.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sbom4python/scanner.py ADDED
@@ -0,0 +1,788 @@
1
+ # Copyright (C) 2023 Anthony Harrison
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import ast
5
+ import configparser
6
+ import pathlib
7
+ import platform
8
+ import re
9
+ import string
10
+ import subprocess
11
+ import sys
12
+ import unicodedata
13
+ from typing import Iterable
14
+
15
+ if sys.version_info >= (3, 11):
16
+ import tomllib as toml
17
+ else:
18
+ import toml
19
+
20
+ if sys.version_info >= (3, 10):
21
+ from importlib import metadata as importlib_metadata
22
+ else:
23
+ import importlib_metadata
24
+
25
+ from lib4package.metadata import Metadata
26
+ from lib4sbom.data.document import SBOMDocument
27
+ from lib4sbom.data.package import SBOMPackage
28
+ from lib4sbom.data.relationship import SBOMRelationship
29
+ from lib4sbom.license import LicenseScanner
30
+ from sbom4files.filescanner import FileScanner
31
+
32
+
33
+ class SBOMScanner:
34
+ """
35
+ Simple SBOM Generator for Python module.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ debug,
41
+ include_file=False,
42
+ exclude_license=False,
43
+ lifecycle="build",
44
+ include_service=False,
45
+ use_pip=False,
46
+ python_path: str = None,
47
+ ):
48
+ self.record = []
49
+ self.debug = debug
50
+ self.include_file = include_file
51
+ self.include_license = exclude_license
52
+ self.include_service = include_service
53
+ self.sbom_package = SBOMPackage()
54
+ self.sbom_relationship = SBOMRelationship()
55
+ self.sbom_document = SBOMDocument()
56
+ self.file_scanner = FileScanner()
57
+ self.license = LicenseScanner()
58
+ self.sbom_files = {}
59
+ self.sbom_packages = {}
60
+ self.sbom_relationships = []
61
+ self.parent = "NOT_DEFINED"
62
+ self.package_metadata = Metadata("python", debug=self.debug)
63
+ self.python_version = platform.python_version()
64
+ self.set_lifecycle(lifecycle)
65
+ self.metadata = {}
66
+ self.use_pip = use_pip
67
+ self.python_path = pathlib.Path(python_path).expanduser()
68
+
69
+ def set_parent(self, module):
70
+ self.parent = f"Python-{module}"
71
+
72
+ def run_pip_cmd(self, params: Iterable[str]):
73
+ cmd = ["pip"]
74
+ if self.python_path.exists():
75
+ cmd.extend(("--python", str(self.python_path)))
76
+
77
+ cmd.extend(params)
78
+ return self.run_program(cmd)
79
+
80
+ def run_program(self, params: Iterable[str]):
81
+ res = subprocess.run(list(params), capture_output=True, text=True)
82
+ return res.stdout.splitlines()
83
+
84
+ def set_lifecycle(self, lifecycle):
85
+ self.sbom_document.set_value("lifecycle", lifecycle)
86
+
87
+ def _format_supplier(self, supplier_info, include_email=True):
88
+ # See https://stackoverflow.com/questions/1207457/convert-a-unicode-string-to-a-string-in-python-containing-extra-symbols
89
+ # And convert byte object to a string
90
+ name_str = (
91
+ unicodedata.normalize("NFKD", supplier_info)
92
+ .encode("ascii", "ignore")
93
+ .decode("utf-8")
94
+ )
95
+ if " " in name_str:
96
+ # Get names assumed to be at least two names <first> <surname>
97
+ names = re.findall(r"[a-zA-Z\.\]+ [A-Za-z]+ ", name_str)
98
+ else:
99
+ # Handle case where only single name provided
100
+ names = [name_str]
101
+ # Get email addresses
102
+ # Use RFC-5322 compliant regex (https://regex101.com/library/6EL6YF)
103
+ emails = re.findall(
104
+ r"((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))",
105
+ supplier_info,
106
+ re.IGNORECASE,
107
+ )
108
+ supplier = " ".join(n for n in names)
109
+ if include_email and len(emails) > 0:
110
+ # Only one email can be specified, so choose last one
111
+ supplier = supplier + "(" + emails[-1] + ")"
112
+ return re.sub(" +", " ", supplier.strip())
113
+
114
+ def _create_package(self, package, version, parent="-", requirements=None):
115
+ self.sbom_package.initialise()
116
+ offline = False
117
+ try:
118
+ self.package_metadata.get_package(package, version)
119
+ except Exception as ex:
120
+ offline = True
121
+ if self.debug:
122
+ print(f"[ERROR] Unable to retrieve metadata for {package} - {ex}")
123
+ self.sbom_package.set_name(package)
124
+ self.sbom_package.set_property("language", "Python")
125
+ self.sbom_package.set_property("python_version", self.python_version)
126
+ if version is not None:
127
+ self.sbom_package.set_version(version)
128
+ if not offline:
129
+ # External metadata may lag releases
130
+ self.sbom_package.set_value(
131
+ "release_date", self.package_metadata.get_latest_release_time()
132
+ )
133
+ if requirements is not None:
134
+ self.sbom_package.set_evidence(requirements)
135
+ if parent == "-":
136
+ self.sbom_package.set_type("application")
137
+ self.sbom_package.set_filesanalysis(self.include_file)
138
+ # Get package metadata
139
+ if len(self.metadata) > 0:
140
+ license_information = self.get("License")
141
+ supplier = self.get("Author") + " " + self.get("Author-email")
142
+ home_page = self.get("Home-page")
143
+ summary = self.get("Summary")
144
+ elif not offline:
145
+ license_information = self.package_metadata.get_license()
146
+ # Supplier info
147
+ supplier = self.package_metadata.get_originator()
148
+ if supplier is None:
149
+ supplier = ""
150
+ home_page = self.package_metadata.get_homepage()
151
+ if home_page is None:
152
+ home_page = ""
153
+ summary = self.package_metadata.get_description()
154
+ if summary is None:
155
+ summary = ""
156
+ else:
157
+ license_information = ""
158
+ supplier = ""
159
+ home_page = ""
160
+ summary = ""
161
+ license = self.license.find_license(license_information)
162
+ # Report license as reported by metadata. If not valid SPDX, report NOASSERTION
163
+ if license != license_information:
164
+ self.sbom_package.set_licensedeclared("NOASSERTION")
165
+ else:
166
+ self.sbom_package.set_licensedeclared(license)
167
+ # Report license if valid SPDX identifier
168
+ self.sbom_package.set_licenseconcluded(license)
169
+ # Add comment if metadata license was modified
170
+ license_comment = ""
171
+ if len(license_information) > 0 and license != license_information:
172
+ license_comment = f"{package} declares {license_information} which is not currently a valid SPDX License identifier or expression."
173
+ # Report if license is deprecated
174
+ if self.license.deprecated(license):
175
+ deprecated_comment = f"{license} is now deprecated."
176
+ if len(license_comment) > 0:
177
+ license_comment = f"{license_comment} {deprecated_comment}"
178
+ else:
179
+ license_comment = deprecated_comment
180
+ if len(license_comment) > 0:
181
+ self.sbom_package.set_licensecomments(license_comment)
182
+ if len(supplier.split()) > 3:
183
+ self.sbom_package.set_supplier(
184
+ "Organization", self._format_supplier(supplier)
185
+ )
186
+ elif len(supplier) > 1:
187
+ self.sbom_package.set_supplier("Person", self._format_supplier(supplier))
188
+ else:
189
+ self.sbom_package.set_supplier("UNKNOWN", "NOASSERTION")
190
+ if home_page != "":
191
+ self.sbom_package.set_homepage(home_page)
192
+ if summary != "":
193
+ self.sbom_package.set_summary(summary)
194
+ if self.metadata.get("Project-URL") is not None:
195
+ # Extra references
196
+ # Normalisation of labels
197
+ chars_to_remove = string.punctuation + string.whitespace
198
+ removal_map = str.maketrans("", "", chars_to_remove)
199
+ # Various synonyms of project URLs
200
+ categories = {
201
+ "docs": "documentation",
202
+ "source": "vcs",
203
+ "repository": "vcs",
204
+ "sourcecode": "vcs",
205
+ "github": "vcs",
206
+ "githubrepo": "vcs",
207
+ "gitlab": "vcs",
208
+ "bitbucket": "vcs",
209
+ "git": "vcs",
210
+ "sourceforge": "vcs",
211
+ "svn": "vcs",
212
+ "code": "vcs",
213
+ "changelog": "log",
214
+ "changes": "log",
215
+ "docschangelog": "log",
216
+ "whatsnew": "log",
217
+ "issues": "issue-tracker",
218
+ "bug": "issue-tracker",
219
+ "bugs": "issue-tracker",
220
+ "bugreports": "issue-tracker",
221
+ "bugtracker": "issue-tracker",
222
+ "issuetracker": "issue-tracker",
223
+ "tracker": "issue-tracker",
224
+ "githubissues": "issue-tracker",
225
+ "mailinglist": "mailing-list",
226
+ "mailinglists": "mailing-list",
227
+ "sourcedistribution": "source-distribution",
228
+ "ci": "build-system",
229
+ "cigithub": "build-system",
230
+ "cigithubactions": "build-system",
231
+ "buildsystem": "build-systen",
232
+ "releasenotes": "release-notes",
233
+ "release": "release-notes",
234
+ "releases": "release-notes",
235
+ "twitter": "social",
236
+ "discord": "social",
237
+ "home": "home-page",
238
+ "homepage": "home-page",
239
+ "githubhomepage": "home-page",
240
+ }
241
+ for ref in self.metadata.get("Project-URL"):
242
+ category = ref.split(", ")[0].translate(removal_map).lower()
243
+ locator = ref.split(", ")[1]
244
+ # See if synonymn
245
+ if categories.get(category) is not None:
246
+ if self.debug:
247
+ print(
248
+ f"Updating category from {category} to {categories[category]}"
249
+ )
250
+ category = categories[category]
251
+ if category == "home-page":
252
+ self.sbom_package.set_homepage(locator)
253
+ else:
254
+ self.sbom_package.set_externalreference("OTHER", category, locator)
255
+ if self.metadata.get("Download-URL") is None:
256
+ if version is None:
257
+ self.sbom_package.set_downloadlocation(
258
+ f"https://pypi.org/project/{package}/#files"
259
+ )
260
+ else:
261
+ self.sbom_package.set_downloadlocation(
262
+ f"https://pypi.org/project/{package}/{version}/#files"
263
+ )
264
+ else:
265
+ self.sbom_package.set_downloadlocation(self.metadata.get("Download-URL"))
266
+ # External references
267
+ if version is not None:
268
+ self.sbom_package.set_purl(f"pkg:pypi/{package}@{version}")
269
+ else:
270
+ self.sbom_package.set_purl(f"pkg:pypi/{package}")
271
+ if len(supplier) > 1:
272
+ component_supplier = self._format_supplier(supplier, include_email=False)
273
+ if version is not None:
274
+ cpe_version = version.replace(":", "\\:")
275
+ else:
276
+ cpe_version = ""
277
+ self.sbom_package.set_cpe(
278
+ f"cpe:2.3:a:{component_supplier.replace(' ', '_').lower()}:{package}:{cpe_version}:*:*:*:*:*:*:*"
279
+ )
280
+ checksum, checksum_algorithm = self.package_metadata.get_checksum(
281
+ version=version
282
+ )
283
+ if checksum is not None:
284
+ self.sbom_package.set_checksum(checksum_algorithm, checksum)
285
+ # Copyright
286
+ self.sbom_package.set_copyrighttext("NOASSERTION")
287
+ # Store package data
288
+ self.sbom_packages[
289
+ (
290
+ self.sbom_package.get_name(),
291
+ self.sbom_package.get_value("version"),
292
+ )
293
+ ] = self.sbom_package.get_package()
294
+
295
+ def _create_relationship(self, package, parent="-"):
296
+ self.sbom_relationship.initialise()
297
+ if parent != "-":
298
+ self.sbom_relationship.set_relationship(
299
+ parent.lower(), "DEPENDS_ON", package
300
+ )
301
+ else:
302
+ self.sbom_relationship.set_relationship(self.parent, "DESCRIBES", package)
303
+ self.sbom_relationships.append(self.sbom_relationship.get_relationship())
304
+
305
+ def analyze_code(self, filename):
306
+ """Analyzes Python code for potential external service interactions.
307
+
308
+ Args:
309
+ filename: The Python source file.
310
+
311
+ Returns:
312
+ A list of potential external service interactions.
313
+ """
314
+ potential_external_services = []
315
+ modules = ["requests", "urllib", "httplib2"]
316
+ potential_endpoint = []
317
+ try:
318
+ with open(filename, "r", errors="replace") as f:
319
+ source_code = f.read()
320
+ tree = ast.parse(source_code)
321
+
322
+ for node in ast.walk(tree):
323
+ if isinstance(node, ast.Attribute):
324
+ # Check for function calls on http libraries like requests or urllib
325
+ if (
326
+ isinstance(node.value, ast.Name)
327
+ and (node.value.id in modules)
328
+ and node.attr in ["get", "post", "put", "delete"]
329
+ ):
330
+ if [
331
+ node.value.id,
332
+ node.attr,
333
+ ] not in potential_external_services:
334
+ potential_external_services.append(
335
+ [node.value.id, node.attr]
336
+ )
337
+ elif isinstance(node, ast.Constant):
338
+ if node.value is not None:
339
+ constant = str(node.value)
340
+ if (
341
+ constant.startswith("http")
342
+ and "//" in constant
343
+ and len(constant) > 8
344
+ ):
345
+ # print (filename, constant)
346
+ potential_endpoint.append(constant)
347
+ except FileNotFoundError:
348
+ print(f"[ERROR] {filename} not found")
349
+ except SyntaxError:
350
+ # print(f"[ERROR] Unable to process {filename}.")
351
+ pass
352
+ if len(potential_external_services) > 0 and len(potential_endpoint) > 0:
353
+ if self.debug:
354
+ print(f"Potential endpoint in {filename}")
355
+ for i in potential_endpoint:
356
+ print(i)
357
+ for i in potential_external_services:
358
+ print(i)
359
+
360
+ return potential_endpoint
361
+ else:
362
+ return []
363
+
364
+ def _extract_package_name(self, requirement_string):
365
+ for i, char in enumerate(requirement_string):
366
+ # Ignore optional dependencies
367
+ if "extra" in requirement_string:
368
+ return ""
369
+ # Paqckage names only contain alphanumeric characters and -_
370
+ if not char.isalnum() and char not in ["-", "_"]:
371
+ return requirement_string[:i]
372
+ return requirement_string
373
+
374
+ def _extract_package_names(self, requirements_list):
375
+ return [self._extract_package_name(req) for req in requirements_list]
376
+
377
+ def _getpackage_metadata(self, module):
378
+ metadata = {}
379
+ if self.use_pip:
380
+ out = self.run_pip_cmd(("show", module))
381
+ for line in out:
382
+ entry = line.split(":")
383
+ # If: this line contain an non-empty entry delimited by ':'
384
+ if (len(entry) == 2) and (entry[1] and not (entry[1].isspace())):
385
+ # Store all data after keyword
386
+ metadata[entry[0]] = (
387
+ line.split(f"{entry[0]}:", 1)[1].strip().rstrip("\n")
388
+ )
389
+ elif len(entry) > 2:
390
+ # Likely to include URL
391
+ metadata[entry[0]] = (
392
+ line.split(f"{entry[0]}:", 1)[1].strip().rstrip("\n")
393
+ )
394
+ else:
395
+ try:
396
+ if self.debug:
397
+ print(f"Retrieve metadata for {module}")
398
+ package_data = importlib_metadata.metadata(module)
399
+ except importlib_metadata.PackageNotFoundError:
400
+ if self.debug:
401
+ print(f"Package Not Found : {module}")
402
+ package_data = []
403
+ if len(package_data) == 0:
404
+ if self.debug:
405
+ print(f"Unable to retrieve metadata for {module}")
406
+ return metadata
407
+ package_metadata = dict(package_data)
408
+ if self.debug:
409
+ print(f"Package metadata for {module}")
410
+ for key, value in package_metadata.items():
411
+ print(key, value)
412
+ # Store subset of metadata (same as pip show <module>)
413
+ for attribute in [
414
+ "Name",
415
+ "Version",
416
+ "Summary",
417
+ "Home-page",
418
+ "Author",
419
+ "Author-email",
420
+ "License",
421
+ "Download-URL",
422
+ ]:
423
+ if package_metadata.get(attribute) is not None:
424
+ metadata[attribute] = package_metadata[attribute]
425
+ # License-Expresssion is preferred to License
426
+ if package_metadata.get("License-Expression") is not None:
427
+ metadata["License"] = package_metadata["License-Expression"]
428
+ # Project-URL (multiple)
429
+ if package_metadata.get("Project-URL"):
430
+ metadata["Project-URL"] = package_data.get_all("Project-URL")
431
+ # Requires-Dist (multiple)
432
+ if package_metadata.get("Requires-Dist"):
433
+ requires = package_data.get_all("Requires-Dist")
434
+ else:
435
+ requires = None
436
+ # Use classifier if no license
437
+ if metadata.get("License") is not None:
438
+ if metadata["License"] == "UNKNOWN":
439
+ metadata["License"] = None
440
+ elif "see license" in metadata["License"].lower():
441
+ # If license has text similar to 'see license file', reset
442
+ metadata["License"] = None
443
+ if (
444
+ metadata.get("License") is None
445
+ and package_metadata.get("Classifier") is not None
446
+ ):
447
+ for i in package_data.get_all("Classifier"):
448
+ if i.startswith("License"):
449
+ # Extract license from classifier
450
+ license_name = i.split("::")[-1].strip()
451
+ if metadata.get("License") is None:
452
+ metadata["License"] = license_name
453
+ else:
454
+ metadata[
455
+ "License"
456
+ ] = f'{metadata["License"]} AND {license_name}'
457
+ # Extract dependencies (if any)
458
+ if requires is not None:
459
+ # Find dependent packages
460
+ if self.debug:
461
+ print(f"Dependencies for {module} - {requires}")
462
+
463
+ package_names = self._extract_package_names(requires)
464
+
465
+ package_dependendents = ""
466
+ for name in package_names:
467
+ # Ignore extra packages
468
+ if len(name) > 0:
469
+ package_dependendents = (
470
+ package_dependendents + name.split(" ")[0] + ", "
471
+ )
472
+ # Remove extra punctuation
473
+ metadata["Requires"] = package_dependendents[:-2]
474
+ else:
475
+ metadata["Requires"] = ""
476
+ if self.debug:
477
+ print(f"Metadata for {module} - {metadata}")
478
+ return metadata
479
+
480
+ def process_module(self, module, parent="-"):
481
+ if self.debug:
482
+ print(f"Process Module {module}")
483
+ self.metadata = self._getpackage_metadata(module.strip())
484
+ # If module not found, no metadata returned
485
+ if len(self.metadata) > 0:
486
+ package = self.get("Name").lower().replace("_", "-")
487
+ version = self.get("Version")
488
+ if (package, version) in self.sbom_packages:
489
+ if self.debug:
490
+ print(f"Already processed {package} {version}")
491
+ # Prevent metadata being reprocessed
492
+ self.metadata = {}
493
+ else:
494
+ self._create_package(package, version, parent)
495
+ self._create_relationship(package, parent)
496
+ if self.include_file:
497
+ package = self.get("Name").lower().replace("-", "_")
498
+ directory_location = f'{self.get("Location")}/{package}'
499
+ file_dir = pathlib.Path(directory_location)
500
+ if self.debug:
501
+ print(f"Directory for {package}: {file_dir}")
502
+ if file_dir.exists():
503
+ filtered = [x for x in file_dir.glob("**/*")]
504
+ else:
505
+ # Module is only a single file
506
+ filtered = [pathlib.Path(f'{self.get("Location")}/{package}')]
507
+ if self.debug:
508
+ print(f"Filenames: {filtered}")
509
+ for entry in filtered:
510
+ # Ignore compiled code
511
+ if str(entry).endswith(".pyc"):
512
+ continue
513
+ if self.debug:
514
+ print(f"Analyse file in {entry}")
515
+ if self.include_service:
516
+ external_services = self.analyze_code(entry)
517
+ if len(external_services) > 0:
518
+ print(f"External services in {entry}")
519
+
520
+ if self.file_scanner.scan_file(entry):
521
+ self.sbom_files[
522
+ self.file_scanner.get_name()
523
+ ] = self.file_scanner.get_file()
524
+ # Add relationship
525
+ self.sbom_relationship.initialise()
526
+ self.sbom_relationship.set_relationship(
527
+ package, "CONTAINS", self.file_scanner.get_name()
528
+ )
529
+ self.sbom_relationship.set_relationship_id(
530
+ self.sbom_package.get_value("id"),
531
+ self.file_scanner.get_value("id"),
532
+ )
533
+ self.sbom_relationship.set_target_type("file")
534
+ self.sbom_relationships.append(
535
+ self.sbom_relationship.get_relationship()
536
+ )
537
+ elif self.debug:
538
+ print(f"Module {module} not found")
539
+ return len(self.metadata) > 0
540
+
541
+ def get(self, attribute):
542
+ if self.metadata.get(attribute) is not None:
543
+ return self.metadata.get(attribute, "").lstrip()
544
+ return ""
545
+
546
+ def get_files(self):
547
+ return self.sbom_files
548
+
549
+ def get_packages(self):
550
+ return self.sbom_packages
551
+
552
+ def get_relationships(self):
553
+ if self.debug:
554
+ print(self.sbom_relationships)
555
+ return self.sbom_relationships
556
+
557
+ def get_document(self):
558
+ return self.sbom_document.get_document()
559
+
560
+ def get_parent(self):
561
+ return self.parent
562
+
563
+ def analyze(self, parent, dependencies):
564
+ if len(dependencies) == 0:
565
+ return
566
+ else:
567
+ for r in dependencies.split(","):
568
+ if self.process_module(r, parent):
569
+ self.analyze(r.strip(), self.get("Requires"))
570
+
571
+ def process_python_module(self, module_name):
572
+ self.set_parent(module_name)
573
+ if self.process_module(module_name):
574
+ self.analyze(self.get("Name"), self.get("Requires"))
575
+
576
+ def _get_installed_modules(self):
577
+ modules = []
578
+ if self.use_pip:
579
+ out = self.run_pip_cmd(("list",))
580
+ if len(out) > 0:
581
+ # Ignore headers in output stream
582
+ for m in out[2:]:
583
+ modules.append(m.split(" ")[0])
584
+ else:
585
+ installed_packages_info = importlib_metadata.distributions()
586
+ # modules = sorted(
587
+ # [p.metadata["Name"].lower() for p in installed_packages_info]
588
+ # )
589
+ modules = sorted([p.metadata["Name"] for p in installed_packages_info])
590
+ if self.debug:
591
+ print(modules)
592
+ return modules
593
+
594
+ def process_system(self):
595
+ modules = self._get_installed_modules()
596
+ self.set_parent("system")
597
+ for module_name in modules:
598
+ if self.process_module(module_name):
599
+ self.analyze(self.get("Name"), self.get("Requires"))
600
+
601
+ def process_requirements(self, filename):
602
+ if filename.endswith(".toml"):
603
+ # Could be a pyproject or pylock file
604
+ self.process_pyproject(filename)
605
+ self.process_pylock(filename)
606
+ elif filename.endswith(".cfg"):
607
+ self.process_setup_cfg(filename)
608
+ elif filename.endswith(".py"):
609
+ self.process_setup_py(filename)
610
+ elif filename.endswith(".txt"):
611
+ self.process_requirements_file(filename)
612
+ elif filename.endswith(".lock"):
613
+ self.process_uvlock_file(filename)
614
+ elif self.debug:
615
+ print(f"Unable to process requirements file {filename}")
616
+
617
+ def _process_requirement_dependency(self, dependency, filename):
618
+ dependency = dependency.split("#")[0].strip()
619
+ if len(dependency) > 0:
620
+ # Ignore anything after ; e.g. python_version<"3.8"
621
+ element = dependency.strip().split(";")[0]
622
+ # Check for pinned dependency
623
+ component = element.split("==")
624
+ if len(component) == 2:
625
+ # Package and version found
626
+ package = component[0]
627
+ version = component[1]
628
+ if self.debug:
629
+ print(f"Processing {package} version {version}")
630
+ else:
631
+ # Not pinned version
632
+ package = self._extract_package_name(element.split(" ")[0])
633
+ version = None
634
+ if self.debug:
635
+ print(f"Processing {package}")
636
+ self._create_package(package, version, requirements=filename)
637
+ self._create_relationship(package)
638
+
639
+ def process_requirements_file(self, filename):
640
+ # Process a requirements.txt file
641
+ if len(filename) > 0:
642
+ # Check file exists
643
+ filePath = pathlib.Path(filename)
644
+ # Check path exists and is a valid file
645
+ if filePath.exists() and filePath.is_file():
646
+ with open(filename) as dir_file:
647
+ lines = dir_file.readlines()
648
+ self.set_lifecycle("pre-build")
649
+ self.set_parent(filename)
650
+ for line in lines:
651
+ self._process_requirement_dependency(line, filename)
652
+
653
+ def process_pyproject(self, filename):
654
+ # Process pyproject.toml file
655
+ if len(filename) > 0:
656
+ # Check file exists
657
+ filePath = pathlib.Path(filename)
658
+ # Check path exists and is a valid file
659
+ if filePath.exists() and filePath.is_file():
660
+ with open(filename, "rb") as file:
661
+ pyproject_data = toml.load(file)
662
+ if "project" in pyproject_data:
663
+ if "dependencies" in pyproject_data["project"]:
664
+ dependencies = pyproject_data["project"]["dependencies"]
665
+ if self.debug:
666
+ print(dependencies)
667
+ self.set_lifecycle("pre-build")
668
+ self.set_parent(filename)
669
+ for dependency in dependencies:
670
+ self._process_requirement_dependency(
671
+ dependency, filename
672
+ )
673
+
674
+ def process_setup_cfg(self, filename):
675
+ # Process setup.cfg file
676
+ if len(filename) > 0:
677
+ # Check file exists
678
+ filePath = pathlib.Path(filename)
679
+ # Check path exists and is a valid file
680
+ if filePath.exists() and filePath.is_file():
681
+ config = configparser.ConfigParser()
682
+ config.read(filename)
683
+ if "options" in config.sections():
684
+ if "install_requires" in config["options"]:
685
+ dependencies = config["options"]["install_requires"]
686
+ if self.debug:
687
+ print(dependencies)
688
+ self.set_lifecycle("pre-build")
689
+ self.set_parent(filename)
690
+ for dependency in dependencies.splitlines():
691
+ self._process_requirement_dependency(dependency, filename)
692
+
693
+ def process_setup_py(self, filename):
694
+ # Process setup.py file
695
+ if len(filename) > 0:
696
+ # Check file exists
697
+ filePath = pathlib.Path(filename)
698
+ # Check path exists and is a valid file
699
+ if filePath.exists() and filePath.is_file():
700
+ dependencies = []
701
+ with open(filename, "r") as setup_file:
702
+ content = setup_file.read()
703
+ # Read the file into a stream and search for list if dependencies specified by install_requires
704
+ stream = content.replace("\n", "")
705
+ match = re.search(r"install_requires\s*=\s*\[([^\]]+)\]", stream)
706
+ if match:
707
+ dependency_list = match.group(1).strip()
708
+ dependencies = [
709
+ dep.strip().replace('"', "").replace("'", "")
710
+ for dep in dependency_list.split(",")
711
+ if len(dep) > 0
712
+ ]
713
+ # Method 2: Handle multiline string with .split()
714
+ # Handles: install_requires = """package==1.0\npackage2>=2.0""".split()
715
+ # Also handles single quotes: install_requires = '''...'''.split()
716
+ if not dependencies:
717
+ split_match = re.search(
718
+ r'install_requires\s*=\s*["\'"]{3}([^"\']+)["\'"]{3}\.split\(\)',
719
+ content,
720
+ re.DOTALL,
721
+ )
722
+ if split_match:
723
+ # Extract dependencies from the multiline string
724
+ deps_block = split_match.group(1).strip()
725
+ # Split by newlines and filter out empty lines
726
+ dependencies = [
727
+ line.strip()
728
+ for line in deps_block.split("\n")
729
+ if line.strip() and not line.strip().startswith("#")
730
+ ]
731
+ if self.debug:
732
+ print(dependencies)
733
+ self.set_lifecycle("pre-build")
734
+ self.set_parent(filename)
735
+ for dependency in dependencies:
736
+ self._process_requirement_dependency(dependency, filename)
737
+
738
+ def process_pylock(self, filename):
739
+ # Process pylock.toml file
740
+ if len(filename) > 0:
741
+ # Check file exists
742
+ filePath = pathlib.Path(filename)
743
+ # Check path exists and is a valid file
744
+ if filePath.exists() and filePath.is_file():
745
+ with open(filename, "rb") as file:
746
+ pylock_data = toml.load(file)
747
+ if "lock-version" in pylock_data:
748
+ if self.debug:
749
+ print(pylock_data)
750
+ if "packages" in pylock_data:
751
+ self.set_lifecycle("pre-build")
752
+ self.set_parent(filename)
753
+ for package in pylock_data["packages"]:
754
+ if "version" in package:
755
+ self._process_requirement_dependency(
756
+ f"{package['name']}=={package['version']}",
757
+ filename,
758
+ )
759
+ if "dependencies" in package:
760
+ for dependency in package["dependencies"]:
761
+ self._create_relationship(
762
+ dependency["name"], package["name"]
763
+ )
764
+
765
+ def process_uvlock_file(self, filename):
766
+ # Process uv.lock file
767
+ if len(filename) > 0:
768
+ # Check file exists
769
+ filePath = pathlib.Path(filename)
770
+ # Check path exists and is a valid file
771
+ if filePath.exists() and filePath.is_file():
772
+ with open(filename, "rb") as file:
773
+ uvlock_data = toml.load(file)
774
+ if self.debug:
775
+ print(uvlock_data)
776
+ if "package" in uvlock_data:
777
+ self.set_lifecycle("build")
778
+ self.set_parent(filename)
779
+ for package in uvlock_data["package"]:
780
+ if "version" in package:
781
+ self._process_requirement_dependency(
782
+ f"{package['name']}=={package['version']}", filename
783
+ )
784
+ if "dependencies" in package:
785
+ for dependency in package["dependencies"]:
786
+ self._create_relationship(
787
+ dependency["name"], package["name"]
788
+ )