debsbom 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. debsbom-0.1.0/LICENSE +8 -0
  2. debsbom-0.1.0/PKG-INFO +162 -0
  3. debsbom-0.1.0/README.md +122 -0
  4. debsbom-0.1.0/pyproject.toml +84 -0
  5. debsbom-0.1.0/setup.cfg +4 -0
  6. debsbom-0.1.0/src/debsbom/__init__.py +14 -0
  7. debsbom-0.1.0/src/debsbom/apt/cache.py +189 -0
  8. debsbom-0.1.0/src/debsbom/cli.py +474 -0
  9. debsbom-0.1.0/src/debsbom/download/__init__.py +12 -0
  10. debsbom-0.1.0/src/debsbom/download/cdx.py +68 -0
  11. debsbom-0.1.0/src/debsbom/download/download.py +143 -0
  12. debsbom-0.1.0/src/debsbom/download/merger.py +115 -0
  13. debsbom-0.1.0/src/debsbom/download/resolver.py +177 -0
  14. debsbom-0.1.0/src/debsbom/download/spdx.py +78 -0
  15. debsbom-0.1.0/src/debsbom/dpkg/package.py +447 -0
  16. debsbom-0.1.0/src/debsbom/generate/__init__.py +5 -0
  17. debsbom-0.1.0/src/debsbom/generate/cdx.py +239 -0
  18. debsbom-0.1.0/src/debsbom/generate/generate.py +219 -0
  19. debsbom-0.1.0/src/debsbom/generate/spdx.py +261 -0
  20. debsbom-0.1.0/src/debsbom/repack/__init__.py +5 -0
  21. debsbom-0.1.0/src/debsbom/repack/cdx.py +72 -0
  22. debsbom-0.1.0/src/debsbom/repack/packer.py +143 -0
  23. debsbom-0.1.0/src/debsbom/repack/spdx.py +79 -0
  24. debsbom-0.1.0/src/debsbom/sbom.py +143 -0
  25. debsbom-0.1.0/src/debsbom/snapshot/__init__.py +0 -0
  26. debsbom-0.1.0/src/debsbom/snapshot/client.py +236 -0
  27. debsbom-0.1.0/src/debsbom/util/__init__.py +5 -0
  28. debsbom-0.1.0/src/debsbom/util/compression.py +82 -0
  29. debsbom-0.1.0/src/debsbom.egg-info/PKG-INFO +162 -0
  30. debsbom-0.1.0/src/debsbom.egg-info/SOURCES.txt +37 -0
  31. debsbom-0.1.0/src/debsbom.egg-info/dependency_links.txt +1 -0
  32. debsbom-0.1.0/src/debsbom.egg-info/entry_points.txt +2 -0
  33. debsbom-0.1.0/src/debsbom.egg-info/requires.txt +25 -0
  34. debsbom-0.1.0/src/debsbom.egg-info/top_level.txt +1 -0
  35. debsbom-0.1.0/tests/test_download.py +218 -0
  36. debsbom-0.1.0/tests/test_dpkg.py +147 -0
  37. debsbom-0.1.0/tests/test_generation.py +273 -0
  38. debsbom-0.1.0/tests/test_snapshot.py +79 -0
  39. debsbom-0.1.0/tests/test_source_merger.py +67 -0
debsbom-0.1.0/LICENSE ADDED
@@ -0,0 +1,8 @@
1
+ Copyright (c) 2025 Siemens
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8
+
debsbom-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,162 @@
1
+ Metadata-Version: 2.4
2
+ Name: debsbom
3
+ Version: 0.1.0
4
+ Summary: Generate SBOMs for Debian-based distributions.
5
+ Author-email: Christoph Steiger <christoph.steiger@siemens.com>
6
+ Maintainer-email: Christoph Steiger <christoph.steiger@siemens.com>, Gernot Hillier <gernot.hillier@siemens.com>
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/siemens/debsbom
9
+ Project-URL: Repository, https://github.com/siemens/debsbom.git
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Operating System :: POSIX :: Linux
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: System :: Operating System Kernels :: Linux
14
+ Classifier: Topic :: Utilities
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: cyclonedx-python-lib>=11.0.0
19
+ Requires-Dist: packageurl-python>=0.16.0
20
+ Requires-Dist: spdx-tools>=0.8.3
21
+ Requires-Dist: python-debian>=0.1.49
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=6.0; extra == "dev"
24
+ Requires-Dist: pytest-cov>=6; extra == "dev"
25
+ Requires-Dist: black; extra == "dev"
26
+ Requires-Dist: beartype>=0.20; extra == "dev"
27
+ Requires-Dist: debsbom[download]; extra == "dev"
28
+ Requires-Dist: debsbom[doc]; extra == "dev"
29
+ Provides-Extra: download
30
+ Requires-Dist: requests>=2.25.1; extra == "download"
31
+ Requires-Dist: zstandard>=0.20; extra == "download"
32
+ Provides-Extra: apt
33
+ Requires-Dist: python3-apt>=2.6.0; extra == "apt"
34
+ Provides-Extra: doc
35
+ Requires-Dist: sphinx<8,>=7; extra == "doc"
36
+ Requires-Dist: sphinx-rtd-theme>=2.0.0; extra == "doc"
37
+ Requires-Dist: sphinx-autodoc-typehints>=2.0.0; extra == "doc"
38
+ Requires-Dist: sphinx-argparse>=0.2.1; extra == "doc"
39
+ Dynamic: license-file
40
+
41
+ [![Tests](https://github.com/siemens/debsbom/actions/workflows/test.yml/badge.svg)](https://github.com/siemens/debsbom/actions/workflows/test.yml)
42
+ [![Documentation](https://github.com/siemens/debsbom/actions/workflows/docs.yml/badge.svg)](https://siemens.github.io/debsbom)
43
+
44
+ # debsbom - SBOM generator for Debian-based distributions
45
+
46
+ `debsbom` generates SBOMs (Software Bill of Materials) for distributions based on Debian in the two standard formats [SPDX](https://www.spdx.org) and [CycloneDX](https://www.cyclonedx.org).
47
+
48
+ The generated SBOM includes all installed binary packages and also contains [Debian Source packages](https://www.debian.org/doc/debian-policy/ch-source.html).
49
+
50
+ Source packages are especially relevant for security as CVEs in the Debian ecosystem are filed not against the installed binary packages, but source packages. The names of source and binary packages must not always be the same, and in some cases a single source package builds a number of binary packages.
51
+
52
+ ## Usage
53
+
54
+ ```
55
+ usage: debsbom [-h] [--version] [-v] [--progress] {generate,download,source-merge,repack} ...
56
+
57
+ SBOM tool for Debian systems.
58
+
59
+ positional arguments:
60
+ {generate,download,source-merge,repack}
61
+ sub command help
62
+ generate generate a SBOM for a Debian system
63
+ download download referenced packages
64
+ source-merge merge referenced source packages
65
+ repack repack sources and sbom
66
+
67
+ options:
68
+ -h, --help show this help message and exit
69
+ --version show program's version number and exit
70
+ -v, --verbose be more verbose
71
+ --progress report progress
72
+ ```
73
+
74
+ ## Scope of the tool
75
+
76
+ The primary goal is to generate Software Bills of Materials (SBOMs) for Debian-based systems, focusing on security and license clearing requirements.
77
+ The `generate` command operates entirely offline, making it suitable for use in air-gapped networks or environments where internet connectivity is restricted.
78
+
79
+ ### Goals
80
+
81
+ The `generate` command creates comprehensive SBOMs that include all installed software packages and their dependencies (binary, source package and
82
+ `built-using`[[1]](https://www.debian.org/doc/debian-policy/ch-relationships.html#s-built-using)).
83
+ These SBOM outputs are designed to serve as reliable input for vulnerability management systems and license compliance checks.
84
+
85
+ The tool provides auxiliary commands for package source retrieval. These enable users to:
86
+ 1. Retrieve packages from Debian's upstream repositories and report missing packages.
87
+ 2. Convert the multi-archive source packages into a single artifact (one archive per source package)
88
+
89
+ At its core, this tool was designed to fulfill these SBOM generation requirements while maintaining:
90
+ 1. A minimal dependency footprint: avoid huge dependency graph of external software ecosystems (like Go or Rust)
91
+ 2. Strict focus on Debian-specific package formats
92
+ 3. Clear separation between binary packages and their corresponding source packages
93
+ 4. Use official SPDX / CycloneDX libraries to ensure syntactic and semantic correctness
94
+
95
+ ### Non Goals
96
+
97
+ - License and copyright text extraction from source packages
98
+ - Real-time vulnerability database integration
99
+ - Signing and attestation of generated artifacts
100
+
101
+ ## Package Relations
102
+
103
+ A Debian distribution is composed of source packages and binary packages.
104
+ Binary packages are installed into the root filesystem, while the source packages are the originals from which those binaries are built.
105
+
106
+ Some binary packages are installed explicitly by the user; others appear automatically as dependencies of the explicitly‑installed packages.
107
+ The SBOM mirrors this relationship, using the `distro-package` entry as the single central node for traversing the package graph.
108
+
109
+ ```
110
+ distro-package
111
+ ├─ binary-package-foo
112
+ ├─── source-package-foo
113
+ ├─── binary-dep-of-foo
114
+ ├─ binary-package-bar
115
+ ├─── source-package-bar
116
+ └─── binary-dep-of-bar
117
+ ```
118
+
119
+ ### Source-Binary
120
+
121
+ To differentiate binary and source packages in the SBOM a different approach for each SBOM standard is required.
122
+
123
+ #### CycloneDX
124
+
125
+ In the CDX format it is currently not possible to mark a component as a source package. There is an ongoing discussion [[2]](https://github.com/CycloneDX/specification/issues/612) which, while looking promising, will not land in the standard for quite some time. In the meantime source packages can only be identified by their PURL by looking at the `arch=source` qualifier. The relationships between a binary and its source package is done with a simple dependency.
126
+
127
+ #### SPDX
128
+
129
+ We differentiate a source package by setting `"primaryPackagePurpose": "SOURCE"` as opposed to `LIBRARY` for binary packages. Their relationship is expressed with the `GENERATES` relation. For packages that are marked as `Built-Using` in the dpkg status file, we use the `GENERATED_FROM` relation. This expresses the same semantic in SPDX, but this way it can still be identified if it is a proper source/binary relationship or a built-using one.
130
+
131
+ ## Generate from Package List
132
+
133
+ In addition to parsing the list of installed packages from the dpkg status file, we also support to provide a list of packages that should be resolved
134
+ from the apt-cache: When running `debsbom generate --from-pkglist`, the tool
135
+ processes package entries passed via stdin as line separated items. The format
136
+ for each entry is:
137
+
138
+ ```
139
+ <package-name> <package-version> <package-arch>
140
+ ```
141
+
142
+ Example:
143
+
144
+ ```bash
145
+ cat <<EOF | debsbom generate --from-pkglist -t cdx -t spdx -o sbom
146
+ cpp 4:15.2.0-4 amd64
147
+ htop 3.4.1-5 amd64
148
+ EOF
149
+ ```
150
+
151
+ ## Limitations
152
+
153
+ ### License Information
154
+
155
+ License information in Debian is stored in `/usr/share/doc/**/copyright`. The format of these files is not required to be machine-interpretable. For most packages this is the case and they are machine-readable, but there are some cases where the exact license determination is hard.
156
+ To prevent any false license information to be included in the SBOM they are not emitted for now.
157
+
158
+ ### Vendor Packages
159
+
160
+ Vendor packages are currently not identified. Identifying them is important to emit the correct PURL. Right now we make no difference between vendor and official packages. That means we emit potentially incorrect PURLs for vendor packages.
161
+
162
+ Reliably and correctly identifying if a package is a vendor package or not is non-trivial without access to the internet. For this reason we do not attempt it. If you have vendor packages in your distribution we assume you know them, and if not you can identify them in postprocessing. A simple way is to use `debsbom download` and look for any packages that failed to download, or whose checksums do not match.
@@ -0,0 +1,122 @@
1
+ [![Tests](https://github.com/siemens/debsbom/actions/workflows/test.yml/badge.svg)](https://github.com/siemens/debsbom/actions/workflows/test.yml)
2
+ [![Documentation](https://github.com/siemens/debsbom/actions/workflows/docs.yml/badge.svg)](https://siemens.github.io/debsbom)
3
+
4
+ # debsbom - SBOM generator for Debian-based distributions
5
+
6
+ `debsbom` generates SBOMs (Software Bill of Materials) for distributions based on Debian in the two standard formats [SPDX](https://www.spdx.org) and [CycloneDX](https://www.cyclonedx.org).
7
+
8
+ The generated SBOM includes all installed binary packages and also contains [Debian Source packages](https://www.debian.org/doc/debian-policy/ch-source.html).
9
+
10
+ Source packages are especially relevant for security as CVEs in the Debian ecosystem are filed not against the installed binary packages, but source packages. The names of source and binary packages must not always be the same, and in some cases a single source package builds a number of binary packages.
11
+
12
+ ## Usage
13
+
14
+ ```
15
+ usage: debsbom [-h] [--version] [-v] [--progress] {generate,download,source-merge,repack} ...
16
+
17
+ SBOM tool for Debian systems.
18
+
19
+ positional arguments:
20
+ {generate,download,source-merge,repack}
21
+ sub command help
22
+ generate generate a SBOM for a Debian system
23
+ download download referenced packages
24
+ source-merge merge referenced source packages
25
+ repack repack sources and sbom
26
+
27
+ options:
28
+ -h, --help show this help message and exit
29
+ --version show program's version number and exit
30
+ -v, --verbose be more verbose
31
+ --progress report progress
32
+ ```
33
+
34
+ ## Scope of the tool
35
+
36
+ The primary goal is to generate Software Bills of Materials (SBOMs) for Debian-based systems, focusing on security and license clearing requirements.
37
+ The `generate` command operates entirely offline, making it suitable for use in air-gapped networks or environments where internet connectivity is restricted.
38
+
39
+ ### Goals
40
+
41
+ The `generate` command creates comprehensive SBOMs that include all installed software packages and their dependencies (binary, source package and
42
+ `built-using`[[1]](https://www.debian.org/doc/debian-policy/ch-relationships.html#s-built-using)).
43
+ These SBOM outputs are designed to serve as reliable input for vulnerability management systems and license compliance checks.
44
+
45
+ The tool provides auxiliary commands for package source retrieval. These enable users to:
46
+ 1. Retrieve packages from Debian's upstream repositories and report missing packages.
47
+ 2. Convert the multi-archive source packages into a single artifact (one archive per source package)
48
+
49
+ At its core, this tool was designed to fulfill these SBOM generation requirements while maintaining:
50
+ 1. A minimal dependency footprint: avoid huge dependency graph of external software ecosystems (like Go or Rust)
51
+ 2. Strict focus on Debian-specific package formats
52
+ 3. Clear separation between binary packages and their corresponding source packages
53
+ 4. Use official SPDX / CycloneDX libraries to ensure syntactic and semantic correctness
54
+
55
+ ### Non Goals
56
+
57
+ - License and copyright text extraction from source packages
58
+ - Real-time vulnerability database integration
59
+ - Signing and attestation of generated artifacts
60
+
61
+ ## Package Relations
62
+
63
+ A Debian distribution is composed of source packages and binary packages.
64
+ Binary packages are installed into the root filesystem, while the source packages are the originals from which those binaries are built.
65
+
66
+ Some binary packages are installed explicitly by the user; others appear automatically as dependencies of the explicitly‑installed packages.
67
+ The SBOM mirrors this relationship, using the `distro-package` entry as the single central node for traversing the package graph.
68
+
69
+ ```
70
+ distro-package
71
+ ├─ binary-package-foo
72
+ ├─── source-package-foo
73
+ ├─── binary-dep-of-foo
74
+ ├─ binary-package-bar
75
+ ├─── source-package-bar
76
+ └─── binary-dep-of-bar
77
+ ```
78
+
79
+ ### Source-Binary
80
+
81
+ To differentiate binary and source packages in the SBOM a different approach for each SBOM standard is required.
82
+
83
+ #### CycloneDX
84
+
85
+ In the CDX format it is currently not possible to mark a component as a source package. There is an ongoing discussion [[2]](https://github.com/CycloneDX/specification/issues/612) which, while looking promising, will not land in the standard for quite some time. In the meantime source packages can only be identified by their PURL by looking at the `arch=source` qualifier. The relationships between a binary and its source package is done with a simple dependency.
86
+
87
+ #### SPDX
88
+
89
+ We differentiate a source package by setting `"primaryPackagePurpose": "SOURCE"` as opposed to `LIBRARY` for binary packages. Their relationship is expressed with the `GENERATES` relation. For packages that are marked as `Built-Using` in the dpkg status file, we use the `GENERATED_FROM` relation. This expresses the same semantic in SPDX, but this way it can still be identified if it is a proper source/binary relationship or a built-using one.
90
+
91
+ ## Generate from Package List
92
+
93
+ In addition to parsing the list of installed packages from the dpkg status file, we also support to provide a list of packages that should be resolved
94
+ from the apt-cache: When running `debsbom generate --from-pkglist`, the tool
95
+ processes package entries passed via stdin as line separated items. The format
96
+ for each entry is:
97
+
98
+ ```
99
+ <package-name> <package-version> <package-arch>
100
+ ```
101
+
102
+ Example:
103
+
104
+ ```bash
105
+ cat <<EOF | debsbom generate --from-pkglist -t cdx -t spdx -o sbom
106
+ cpp 4:15.2.0-4 amd64
107
+ htop 3.4.1-5 amd64
108
+ EOF
109
+ ```
110
+
111
+ ## Limitations
112
+
113
+ ### License Information
114
+
115
+ License information in Debian is stored in `/usr/share/doc/**/copyright`. The format of these files is not required to be machine-interpretable. For most packages this is the case and they are machine-readable, but there are some cases where the exact license determination is hard.
116
+ To prevent any false license information to be included in the SBOM they are not emitted for now.
117
+
118
+ ### Vendor Packages
119
+
120
+ Vendor packages are currently not identified. Identifying them is important to emit the correct PURL. Right now we make no difference between vendor and official packages. That means we emit potentially incorrect PURLs for vendor packages.
121
+
122
+ Reliably and correctly identifying if a package is a vendor package or not is non-trivial without access to the internet. For this reason we do not attempt it. If you have vendor packages in your distribution we assume you know them, and if not you can identify them in postprocessing. A simple way is to use `debsbom download` and look for any packages that failed to download, or whose checksums do not match.
@@ -0,0 +1,84 @@
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "debsbom"
7
+ version = "0.1.0"
8
+ dependencies = [
9
+ "cyclonedx-python-lib>=11.0.0",
10
+ "packageurl-python>=0.16.0",
11
+ "spdx-tools>=0.8.3",
12
+ "python-debian>=0.1.49",
13
+ ]
14
+ requires-python = ">=3.11"
15
+ authors = [
16
+ { name="Christoph Steiger", email="christoph.steiger@siemens.com" },
17
+ ]
18
+ maintainers = [
19
+ { name="Christoph Steiger", email="christoph.steiger@siemens.com" },
20
+ { name="Gernot Hillier", email="gernot.hillier@siemens.com" },
21
+ ]
22
+ description = "Generate SBOMs for Debian-based distributions."
23
+ readme = "README.md"
24
+ license = "MIT"
25
+ classifiers = [
26
+ "Intended Audience :: Developers",
27
+ "Operating System :: POSIX :: Linux",
28
+ "Programming Language :: Python :: 3",
29
+ "Topic :: System :: Operating System Kernels :: Linux" ,
30
+ "Topic :: Utilities",
31
+ ]
32
+ [project.urls]
33
+ Homepage = "https://github.com/siemens/debsbom"
34
+ Repository = "https://github.com/siemens/debsbom.git"
35
+
36
+ [project.optional-dependencies]
37
+ dev = [
38
+ "pytest>=6.0",
39
+ "pytest-cov>=6",
40
+ "black",
41
+ "beartype>=0.20",
42
+ "debsbom[download]",
43
+ "debsbom[doc]",
44
+ ]
45
+ download = [
46
+ "requests>=2.25.1",
47
+ "zstandard>=0.20",
48
+ ]
49
+ # only distributed in Debian (not pip).
50
+ # only needed to speedup apt parsing
51
+ apt = [
52
+ "python3-apt>=2.6.0",
53
+ ]
54
+
55
+ # dependencies to build documentation
56
+ doc = [
57
+ "sphinx>=7,<8",
58
+ "sphinx-rtd-theme>=2.0.0",
59
+ "sphinx-autodoc-typehints>=2.0.0",
60
+ "sphinx-argparse>=0.2.1",
61
+ ]
62
+
63
+ [project.scripts]
64
+ debsbom = "debsbom.cli:main"
65
+
66
+ [tool.black]
67
+ line-length = 100
68
+
69
+ [tool.pytest.ini_options]
70
+ minversion = "6.0"
71
+ testpaths = [
72
+ "tests",
73
+ ]
74
+ markers = [
75
+ "online: tests requiring internet access",
76
+ ]
77
+ filterwarnings = [
78
+ "error",
79
+ ]
80
+
81
+ [tool.coverage.run]
82
+ source = [
83
+ "debsbom"
84
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ # Copyright (C) 2025 Siemens
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ # Optional transitive dependency of dep822 which is only distributed
6
+ # via Debian (not pip). If not available but requested, the library
7
+ # issues a warning which we want to avoid by checking upfront and
8
+ # explicitly requesting the fallback mechanism.
9
+ try:
10
+ import apt
11
+
12
+ HAS_PYTHON_APT = True
13
+ except ImportError:
14
+ HAS_PYTHON_APT = False
@@ -0,0 +1,189 @@
1
+ # Copyright (C) 2025 Siemens
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from collections.abc import Callable, Iterable
6
+ from dataclasses import dataclass
7
+ from debian.deb822 import Deb822, Sources, Packages
8
+ from debian.debian_support import Version
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ from ..util.compression import find_compressed_file_variants, stream_compressed_file
13
+ from ..dpkg.package import BinaryPackage, SourcePackage
14
+ from .. import HAS_PYTHON_APT
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class ExtendedStates:
22
+ """
23
+ The apt extended states encode information if a package is manually
24
+ installed or installed via a dependency only.
25
+ """
26
+
27
+ auto_installed: set[tuple[str, str]]
28
+
29
+ def is_manual(self, name: str, arch: str) -> bool:
30
+ """True if package is explicitly installed"""
31
+ return (name, arch) not in self.auto_installed
32
+
33
+ @classmethod
34
+ def from_file(
35
+ cls, file: str | Path, filter_fn: Callable[[str, str], bool] | None = None
36
+ ) -> "ExtendedStates":
37
+ """Factory to create instance from the apt extended states file"""
38
+ auto_installed = set()
39
+ with open(Path(file)) as f:
40
+ for s in Deb822.iter_paragraphs(f, use_apt_pkg=HAS_PYTHON_APT):
41
+ name = s.get("Package")
42
+ arch = s.get("Architecture")
43
+ if s.get("Auto-Installed") != "1":
44
+ continue
45
+ if (filter_fn is None) or (filter_fn(name, arch)):
46
+ auto_installed.add((name, arch))
47
+
48
+ return cls(auto_installed=auto_installed)
49
+
50
+
51
+ @dataclass
52
+ class Repository:
53
+ """Represents a debian repository as cached by apt."""
54
+
55
+ in_release_file: Path
56
+ origin: str | None
57
+ codename: str | None
58
+ architectures: list[str]
59
+ components: list[str] | None = None
60
+ version: Version | None = None
61
+ description: str | None = None
62
+
63
+ @classmethod
64
+ def from_apt_cache(cls, lists_dir: str | Path) -> Iterable["Repository"]:
65
+ """Create repositories from apt lists directory."""
66
+ for entry in Path(lists_dir).iterdir():
67
+ if entry.name.endswith("_InRelease"):
68
+ with open(entry) as f:
69
+ repo = Deb822(f)
70
+ origin = repo.get("Origin")
71
+ codename = repo.get("Codename")
72
+ version = repo.get("Version")
73
+ architectures = repo.get("Architectures", "").split()
74
+ components = repo.get("Components")
75
+ description = repo.get("Description")
76
+ logger.info(f"Found apt lists cache repository: {entry}")
77
+ if not len(architectures):
78
+ logger.error(f"Repository does not specify 'Architectures', ignoring: {entry}")
79
+ continue
80
+ yield Repository(
81
+ in_release_file=entry,
82
+ origin=origin,
83
+ codename=codename,
84
+ version=Version(version) if version else None,
85
+ architectures=architectures,
86
+ components=components.split() if components else None,
87
+ description=description,
88
+ )
89
+
90
+ @classmethod
91
+ def _make_srcpkgs(
92
+ cls, sources: Iterable[Sources], filter_fn: Callable[[str], bool] | None = None
93
+ ) -> Iterable[SourcePackage]:
94
+ _sources = filter(lambda p: filter_fn(p["Package"]), sources) if filter_fn else sources
95
+ for source in _sources:
96
+ yield SourcePackage.from_dep822(source)
97
+
98
+ @classmethod
99
+ def _make_binpkgs(
100
+ cls, packages: Iterable[Packages], filter_fn: Callable[[str, str], bool] | None = None
101
+ ) -> Iterable[BinaryPackage]:
102
+ _pkgs = (
103
+ filter(lambda p: filter_fn(p["Package"], p["Architecture"]), packages)
104
+ if filter_fn
105
+ else packages
106
+ )
107
+ for pkg in _pkgs:
108
+ yield BinaryPackage.from_dep822(pkg)
109
+
110
+ @classmethod
111
+ def _parse_sources(
112
+ cls, sources_file: str, srcpkg_filter: Callable[[str], bool] | None = None
113
+ ) -> Iterable["SourcePackage"]:
114
+ sources_path = Path(sources_file)
115
+ try:
116
+ if sources_path.exists():
117
+ with open(sources_path) as f:
118
+ logger.debug(f"Parsing apt cache source packages: {sources_file}")
119
+ sources_raw = Packages.iter_paragraphs(f, use_apt_pkg=HAS_PYTHON_APT)
120
+ for s in Repository._make_srcpkgs(sources_raw, srcpkg_filter):
121
+ yield s
122
+ else:
123
+ compressed_variant = find_compressed_file_variants(sources_path)[0]
124
+ content = stream_compressed_file(compressed_variant)
125
+ logger.debug(f"Parsing apt cache source packages: {sources_file}")
126
+ # TODO: in python-debian >= 1.0.0 it is possible to directly
127
+ # pass the filename of a compressed file when using apt_pkg
128
+ sources_raw = Packages.iter_paragraphs(content, use_apt_pkg=False)
129
+ for s in Repository._make_srcpkgs(sources_raw, srcpkg_filter):
130
+ yield s
131
+ except (FileNotFoundError, IndexError, RuntimeError):
132
+ logger.debug(f"Missing apt cache sources: {sources_file}")
133
+
134
+ @classmethod
135
+ def _parse_packages(
136
+ cls, packages_file: str, binpkg_filter: Callable[[str, str], bool] | None = None
137
+ ) -> Iterable[BinaryPackage]:
138
+ packages_path = Path(packages_file)
139
+ try:
140
+ if packages_path.exists():
141
+ with open(packages_path) as f:
142
+ packages_raw = Packages.iter_paragraphs(f, use_apt_pkg=HAS_PYTHON_APT)
143
+ logger.debug(f"Parsing apt cache binary packages: {packages_file}")
144
+ for s in Repository._make_binpkgs(packages_raw, binpkg_filter):
145
+ yield s
146
+ else:
147
+ compressed_variant = find_compressed_file_variants(packages_path)[0]
148
+ content = stream_compressed_file(compressed_variant)
149
+ # TODO: in python-debian >= 1.0.0 it is possible to directly
150
+ # pass the filename of a compressed file when using apt_pkg
151
+ packages_raw = Packages.iter_paragraphs(content, use_apt_pkg=False)
152
+ logger.debug(f"Parsing apt cache binary packages: {packages_file}")
153
+ for s in Repository._make_binpkgs(packages_raw, binpkg_filter):
154
+ yield s
155
+ except (FileNotFoundError, IndexError, RuntimeError):
156
+ logger.debug(f"Missing apt cache packages: {packages_file}")
157
+
158
+ def sources(self, filter_fn: Callable[[str], bool] | None = None) -> Iterable[SourcePackage]:
159
+ """Get all source packages from this repository."""
160
+ repo_base = str(self.in_release_file).removesuffix("_InRelease")
161
+ if self.components:
162
+ for component in self.components:
163
+ sources_file = "_".join([repo_base, component, "source", "Sources"])
164
+ for s in self._parse_sources(sources_file, filter_fn):
165
+ yield s
166
+ else:
167
+ sources_file = "_".join([repo_base, "source", "Sources"])
168
+ return self._parse_sources(sources_file, filter_fn)
169
+
170
+ def binpackages(
171
+ self,
172
+ filter_fn: Callable[[str, str], bool] | None = None,
173
+ ext_states: ExtendedStates = ExtendedStates(set()),
174
+ ) -> Iterable[BinaryPackage]:
175
+ """Get all binary packages from this repository"""
176
+ repo_base = str(self.in_release_file).removesuffix("_InRelease")
177
+ if self.components:
178
+ for component in self.components:
179
+ for arch in self.architectures:
180
+ packages_file = "_".join([repo_base, component, f"binary-{arch}", "Packages"])
181
+ for p in self._parse_packages(packages_file, filter_fn):
182
+ p.manually_installed = ext_states.is_manual(p.name, p.architecture)
183
+ yield p
184
+ else:
185
+ for arch in self.architectures:
186
+ packages_file = "_".join([repo_base, f"binary-{arch}", "Packages"])
187
+ for p in self._parse_packages(packages_file, filter_fn):
188
+ p.manually_installed = ext_states.is_manual(p.name, p.architecture)
189
+ yield p