sbom-generator 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sbom_generator-1.1.0/PKG-INFO +209 -0
- sbom_generator-1.1.0/README.md +185 -0
- sbom_generator-1.1.0/pyproject.toml +38 -0
- sbom_generator-1.1.0/sbom_extractor/__init__.py +7 -0
- sbom_generator-1.1.0/sbom_extractor/cli.py +386 -0
- sbom_generator-1.1.0/sbom_extractor/compilation_db.py +102 -0
- sbom_generator-1.1.0/sbom_extractor/cpe.py +50 -0
- sbom_generator-1.1.0/sbom_extractor/cyclonedx_generator.py +247 -0
- sbom_generator-1.1.0/sbom_extractor/html_generator.py +1425 -0
- sbom_generator-1.1.0/sbom_extractor/manifest_parser.py +539 -0
- sbom_generator-1.1.0/sbom_extractor/ntia.py +57 -0
- sbom_generator-1.1.0/sbom_extractor/purl.py +42 -0
- sbom_generator-1.1.0/sbom_extractor/scanner.py +191 -0
- sbom_generator-1.1.0/sbom_extractor/spdx3_generator.py +355 -0
- sbom_generator-1.1.0/sbom_extractor/spdx_generator.py +306 -0
- sbom_generator-1.1.0/sbom_extractor/validator.py +86 -0
- sbom_generator-1.1.0/sbom_extractor/vcs.py +37 -0
- sbom_generator-1.1.0/sbom_generator.egg-info/PKG-INFO +209 -0
- sbom_generator-1.1.0/sbom_generator.egg-info/SOURCES.txt +24 -0
- sbom_generator-1.1.0/sbom_generator.egg-info/dependency_links.txt +1 -0
- sbom_generator-1.1.0/sbom_generator.egg-info/entry_points.txt +2 -0
- sbom_generator-1.1.0/sbom_generator.egg-info/requires.txt +1 -0
- sbom_generator-1.1.0/sbom_generator.egg-info/top_level.txt +1 -0
- sbom_generator-1.1.0/setup.cfg +4 -0
- sbom_generator-1.1.0/setup.py +3 -0
- sbom_generator-1.1.0/tests/test_parsers.py +477 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sbom-generator
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Extract SPDX and CycloneDX SBOMs from open-source projects, including the Linux kernel
|
|
5
|
+
Author-email: Fatih Tekin <fatih.tekin.de@googlemail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/DrFatihTekin/sbom-generator
|
|
8
|
+
Project-URL: Repository, https://github.com/DrFatihTekin/sbom-generator
|
|
9
|
+
Project-URL: Issues, https://github.com/DrFatihTekin/sbom-generator/issues
|
|
10
|
+
Keywords: sbom,spdx,cyclonedx,security,linux-kernel,compliance
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Topic :: Security
|
|
19
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
20
|
+
Classifier: Intended Audience :: Developers
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
|
|
25
|
+
# OpenSBOM Extractor
|
|
26
|
+
|
|
27
|
+
[](https://github.com/DrFatihTekin/sbom-generator/actions/workflows/ci.yml)
|
|
28
|
+
|
|
29
|
+
A production-ready Python CLI for extracting Software Bill of Materials (SBOM) from open-source codebases. Built for scale — from small libraries to the Linux kernel (70k+ files).
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Key Features
|
|
34
|
+
|
|
35
|
+
- **Multi-ecosystem dependency extraction** — parses manifests and lock files for Python, Node.js, Rust, Go, and Java (Maven + Gradle). Lock files are preferred over manifests for exact pinned versions.
|
|
36
|
+
- **Parallel file scanning** — thread pool for hashing and license extraction with a live progress bar.
|
|
37
|
+
- **Streaming JSON output** — SPDX and CycloneDX documents are written one entry at a time; the full document is never held in memory, making 70k+ file projects practical.
|
|
38
|
+
- **Correct PURL generation** — all package references follow the [Package URL spec](https://github.com/package-url/purl-spec) (`pkg:pypi/…`, `pkg:maven/…`, etc.).
|
|
39
|
+
- **CPE identifiers** — best-effort CPE 2.3 strings generated for every dependency, enabling vulnerability matching against the NVD.
|
|
40
|
+
- **SPDX expression support** — correctly preserves compound identifiers like `GPL-2.0-only OR MIT` and `GPL-2.0-only WITH Linux-syscall-note`.
|
|
41
|
+
- **Git VCS metadata** — embeds commit, branch, tag, and remote URL into every SBOM format.
|
|
42
|
+
- **Reproducible output** — `--reproducible` produces bit-identical SBOMs across runs (fixed timestamp, deterministic UUID).
|
|
43
|
+
- **NTIA minimum elements check** — validates the 7 NTIA-required fields at runtime and reports any gaps.
|
|
44
|
+
- **SBOM structural validation** — validates generated SPDX 2.3 and CycloneDX 1.5 documents before writing.
|
|
45
|
+
- **Standards-compliant output** — SPDX 2.3, SPDX 3.0.1, and CycloneDX 1.5 JSON; plus an interactive HTML dashboard.
|
|
46
|
+
- **Precision C/C++ build tracing** — via Clang `compile_commands.json` or Linux kernel Kbuild `.cmd` files.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Supported Languages
|
|
51
|
+
|
|
52
|
+
### License detection
|
|
53
|
+
|
|
54
|
+
SPDX license tags are extracted from any source file, including: C, C++, Python, JavaScript, TypeScript, Go, Rust, Java, Kotlin, Swift, C#, Shell, Perl, Ruby, PHP, Lua, Assembly, and common config formats (YAML, TOML, JSON, Makefile, Kconfig).
|
|
55
|
+
|
|
56
|
+
### Dependency extraction
|
|
57
|
+
|
|
58
|
+
| Ecosystem | Files parsed (lock file preferred) |
|
|
59
|
+
|---|---|
|
|
60
|
+
| Python | `poetry.lock` / `requirements.txt` / `requirements.in`, `pyproject.toml` |
|
|
61
|
+
| Node.js | `package-lock.json` / `package.json` |
|
|
62
|
+
| Rust | `Cargo.lock` / `Cargo.toml` |
|
|
63
|
+
| Go | `go.sum` / `go.mod` |
|
|
64
|
+
| Java (Maven) | `pom.xml` — including sub-modules and `<properties>` resolution |
|
|
65
|
+
| Java (Gradle) | `gradle.lockfile` / `build.gradle` / `build.gradle.kts` |
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install -e .
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Requires Python 3.9+. The only runtime dependency is [`rich`](https://github.com/Textualize/rich) for progress display.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Usage
|
|
80
|
+
|
|
81
|
+
### General directory scan
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
sbom-extractor /path/to/project -o my-project-sbom
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### With supplier name (required for NTIA compliance)
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
sbom-extractor /path/to/project --supplier "Acme Corp" -o my-project-sbom
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Reproducible output (for SBOM diffing in CI)
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
sbom-extractor /path/to/project --reproducible -o my-project-sbom
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### C/C++ project with a Clang compilation database
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
sbom-extractor /path/to/project \
|
|
103
|
+
--compile-commands /path/to/project/compile_commands.json \
|
|
104
|
+
-o project-sbom
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Generate `compile_commands.json` with CMake (`-DCMAKE_EXPORT_COMPILE_COMMANDS=ON`) or [Bear](https://github.com/rizsotto/Bear).
|
|
108
|
+
|
|
109
|
+
### Linux kernel (compile_commands.json)
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
cd /path/to/linux
|
|
113
|
+
make defconfig && make -j$(nproc)
|
|
114
|
+
python3 scripts/clang-tools/gen_compile_commands.py
|
|
115
|
+
|
|
116
|
+
sbom-extractor /path/to/linux \
|
|
117
|
+
--compile-commands /path/to/linux/compile_commands.json \
|
|
118
|
+
--no-hashes \
|
|
119
|
+
-o linux-sbom
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Linux kernel (Kbuild .cmd files)
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
sbom-extractor /path/to/linux \
|
|
126
|
+
--kernel-build /path/to/linux/build-output \
|
|
127
|
+
--no-hashes \
|
|
128
|
+
-o linux-sbom
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
`--no-hashes` is recommended for kernel-scale projects to skip SHA-256/SHA-1 computation.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## CLI Options
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
positional arguments:
|
|
139
|
+
path Path to the project directory to scan
|
|
140
|
+
|
|
141
|
+
options:
|
|
142
|
+
-h, --help Show this help message and exit
|
|
143
|
+
-o, --output OUTPUT Base filename for output files (default: sbom)
|
|
144
|
+
--format {spdx,spdx3,cyclonedx,html,all}
|
|
145
|
+
Output format (default: all)
|
|
146
|
+
--project-name NAME Project name (default: directory name)
|
|
147
|
+
--project-version VERSION Project version (default: 1.0.0)
|
|
148
|
+
--supplier NAME Supplier / organization name — required for NTIA compliance
|
|
149
|
+
--no-hashes Skip SHA-256/SHA-1 hashing (faster for large projects)
|
|
150
|
+
--reproducible Deterministic output: fixed timestamp, UUID derived from
|
|
151
|
+
project name/version — useful for SBOM diffing in CI
|
|
152
|
+
--compile-commands PATH Path to compile_commands.json
|
|
153
|
+
--kernel-build PATH Path to kernel build directory (Kbuild .cmd files)
|
|
154
|
+
--exclude DIR Exclude a directory name from scanning (repeatable)
|
|
155
|
+
--workers N Number of parallel worker threads (default: 2 × CPU count)
|
|
156
|
+
-q, --quiet Suppress all progress output
|
|
157
|
+
-v, --verbose Show extra detail (full license list, validation results)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## Output Files
|
|
163
|
+
|
|
164
|
+
| File | Format | Notes |
|
|
165
|
+
|---|---|---|
|
|
166
|
+
| `sbom.spdx.json` | SPDX 2.3 | Stream-written; validated before save |
|
|
167
|
+
| `sbom.spdx3.json` | SPDX 3.0.1 | JSON-LD graph format |
|
|
168
|
+
| `sbom.cdx.json` | CycloneDX 1.5 | Stream-written; validated before save; includes CPE |
|
|
169
|
+
| `sbom.html` | Interactive HTML | Dark-mode dashboard; file list capped at 5,000 for browser performance |
|
|
170
|
+
|
|
171
|
+
Use `--format spdx`, `--format cyclonedx`, etc. to generate only what you need.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## NTIA Compliance
|
|
176
|
+
|
|
177
|
+
The tool checks the [NTIA minimum elements](https://www.ntia.gov/report/2021/minimum-elements-software-bill-materials) at runtime:
|
|
178
|
+
|
|
179
|
+
| Element | How it's satisfied |
|
|
180
|
+
|---|---|
|
|
181
|
+
| Supplier name | `--supplier` flag |
|
|
182
|
+
| Component name | `--project-name` (or directory name) |
|
|
183
|
+
| Component version | `--project-version` |
|
|
184
|
+
| Unique identifiers | PURL + CPE generated for every dependency |
|
|
185
|
+
| Dependency relationships | `CONTAINS` / `DEPENDS_ON` relationships in all formats |
|
|
186
|
+
| Author of SBOM data | Tool name + version in `creationInfo` |
|
|
187
|
+
| Timestamp | UTC timestamp at generation time (or epoch with `--reproducible`) |
|
|
188
|
+
|
|
189
|
+
Any missing elements are reported as warnings at the end of every run.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Architecture
|
|
194
|
+
|
|
195
|
+
| Module | Responsibility |
|
|
196
|
+
|---|---|
|
|
197
|
+
| `cli.py` | Entry point — argument parsing, progress display, orchestration |
|
|
198
|
+
| `scanner.py` | Parallel directory walk, license extraction, file hashing |
|
|
199
|
+
| `manifest_parser.py` | Manifest and lock file parsing for all supported ecosystems |
|
|
200
|
+
| `compilation_db.py` | Clang `compile_commands.json` and Kbuild `.cmd` parsing |
|
|
201
|
+
| `purl.py` | Canonical PURL generation |
|
|
202
|
+
| `cpe.py` | Best-effort CPE 2.3 generation |
|
|
203
|
+
| `vcs.py` | Git metadata extraction |
|
|
204
|
+
| `ntia.py` | NTIA minimum elements compliance check |
|
|
205
|
+
| `validator.py` | Structural validation for SPDX 2.3 and CycloneDX 1.5 |
|
|
206
|
+
| `spdx_generator.py` | SPDX 2.3 JSON output (in-memory + streaming) |
|
|
207
|
+
| `spdx3_generator.py` | SPDX 3.0.1 JSON-LD output |
|
|
208
|
+
| `cyclonedx_generator.py` | CycloneDX 1.5 JSON output (in-memory + streaming) |
|
|
209
|
+
| `html_generator.py` | Self-contained interactive HTML report |
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# OpenSBOM Extractor
|
|
2
|
+
|
|
3
|
+
[](https://github.com/DrFatihTekin/sbom-generator/actions/workflows/ci.yml)
|
|
4
|
+
|
|
5
|
+
A production-ready Python CLI for extracting Software Bill of Materials (SBOM) from open-source codebases. Built for scale — from small libraries to the Linux kernel (70k+ files).
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Key Features
|
|
10
|
+
|
|
11
|
+
- **Multi-ecosystem dependency extraction** — parses manifests and lock files for Python, Node.js, Rust, Go, and Java (Maven + Gradle). Lock files are preferred over manifests for exact pinned versions.
|
|
12
|
+
- **Parallel file scanning** — thread pool for hashing and license extraction with a live progress bar.
|
|
13
|
+
- **Streaming JSON output** — SPDX and CycloneDX documents are written one entry at a time; the full document is never held in memory, making 70k+ file projects practical.
|
|
14
|
+
- **Correct PURL generation** — all package references follow the [Package URL spec](https://github.com/package-url/purl-spec) (`pkg:pypi/…`, `pkg:maven/…`, etc.).
|
|
15
|
+
- **CPE identifiers** — best-effort CPE 2.3 strings generated for every dependency, enabling vulnerability matching against the NVD.
|
|
16
|
+
- **SPDX expression support** — correctly preserves compound identifiers like `GPL-2.0-only OR MIT` and `GPL-2.0-only WITH Linux-syscall-note`.
|
|
17
|
+
- **Git VCS metadata** — embeds commit, branch, tag, and remote URL into every SBOM format.
|
|
18
|
+
- **Reproducible output** — `--reproducible` produces bit-identical SBOMs across runs (fixed timestamp, deterministic UUID).
|
|
19
|
+
- **NTIA minimum elements check** — validates the 7 NTIA-required fields at runtime and reports any gaps.
|
|
20
|
+
- **SBOM structural validation** — validates generated SPDX 2.3 and CycloneDX 1.5 documents before writing.
|
|
21
|
+
- **Standards-compliant output** — SPDX 2.3, SPDX 3.0.1, and CycloneDX 1.5 JSON; plus an interactive HTML dashboard.
|
|
22
|
+
- **Precision C/C++ build tracing** — via Clang `compile_commands.json` or Linux kernel Kbuild `.cmd` files.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Supported Languages
|
|
27
|
+
|
|
28
|
+
### License detection
|
|
29
|
+
|
|
30
|
+
SPDX license tags are extracted from any source file, including: C, C++, Python, JavaScript, TypeScript, Go, Rust, Java, Kotlin, Swift, C#, Shell, Perl, Ruby, PHP, Lua, Assembly, and common config formats (YAML, TOML, JSON, Makefile, Kconfig).
|
|
31
|
+
|
|
32
|
+
### Dependency extraction
|
|
33
|
+
|
|
34
|
+
| Ecosystem | Files parsed (lock file preferred) |
|
|
35
|
+
|---|---|
|
|
36
|
+
| Python | `poetry.lock` / `requirements.txt` / `requirements.in`, `pyproject.toml` |
|
|
37
|
+
| Node.js | `package-lock.json` / `package.json` |
|
|
38
|
+
| Rust | `Cargo.lock` / `Cargo.toml` |
|
|
39
|
+
| Go | `go.sum` / `go.mod` |
|
|
40
|
+
| Java (Maven) | `pom.xml` — including sub-modules and `<properties>` resolution |
|
|
41
|
+
| Java (Gradle) | `gradle.lockfile` / `build.gradle` / `build.gradle.kts` |
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install -e .
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Requires Python 3.9+. The only runtime dependency is [`rich`](https://github.com/Textualize/rich) for progress display.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Usage
|
|
56
|
+
|
|
57
|
+
### General directory scan
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
sbom-extractor /path/to/project -o my-project-sbom
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### With supplier name (required for NTIA compliance)
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
sbom-extractor /path/to/project --supplier "Acme Corp" -o my-project-sbom
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Reproducible output (for SBOM diffing in CI)
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
sbom-extractor /path/to/project --reproducible -o my-project-sbom
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### C/C++ project with a Clang compilation database
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
sbom-extractor /path/to/project \
|
|
79
|
+
--compile-commands /path/to/project/compile_commands.json \
|
|
80
|
+
-o project-sbom
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Generate `compile_commands.json` with CMake (`-DCMAKE_EXPORT_COMPILE_COMMANDS=ON`) or [Bear](https://github.com/rizsotto/Bear).
|
|
84
|
+
|
|
85
|
+
### Linux kernel (compile_commands.json)
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
cd /path/to/linux
|
|
89
|
+
make defconfig && make -j$(nproc)
|
|
90
|
+
python3 scripts/clang-tools/gen_compile_commands.py
|
|
91
|
+
|
|
92
|
+
sbom-extractor /path/to/linux \
|
|
93
|
+
--compile-commands /path/to/linux/compile_commands.json \
|
|
94
|
+
--no-hashes \
|
|
95
|
+
-o linux-sbom
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Linux kernel (Kbuild .cmd files)
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
sbom-extractor /path/to/linux \
|
|
102
|
+
--kernel-build /path/to/linux/build-output \
|
|
103
|
+
--no-hashes \
|
|
104
|
+
-o linux-sbom
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
`--no-hashes` is recommended for kernel-scale projects to skip SHA-256/SHA-1 computation.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## CLI Options
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
positional arguments:
|
|
115
|
+
path Path to the project directory to scan
|
|
116
|
+
|
|
117
|
+
options:
|
|
118
|
+
-h, --help Show this help message and exit
|
|
119
|
+
-o, --output OUTPUT Base filename for output files (default: sbom)
|
|
120
|
+
--format {spdx,spdx3,cyclonedx,html,all}
|
|
121
|
+
Output format (default: all)
|
|
122
|
+
--project-name NAME Project name (default: directory name)
|
|
123
|
+
--project-version VERSION Project version (default: 1.0.0)
|
|
124
|
+
--supplier NAME Supplier / organization name — required for NTIA compliance
|
|
125
|
+
--no-hashes Skip SHA-256/SHA-1 hashing (faster for large projects)
|
|
126
|
+
--reproducible Deterministic output: fixed timestamp, UUID derived from
|
|
127
|
+
project name/version — useful for SBOM diffing in CI
|
|
128
|
+
--compile-commands PATH Path to compile_commands.json
|
|
129
|
+
--kernel-build PATH Path to kernel build directory (Kbuild .cmd files)
|
|
130
|
+
--exclude DIR Exclude a directory name from scanning (repeatable)
|
|
131
|
+
--workers N Number of parallel worker threads (default: 2 × CPU count)
|
|
132
|
+
-q, --quiet Suppress all progress output
|
|
133
|
+
-v, --verbose Show extra detail (full license list, validation results)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Output Files
|
|
139
|
+
|
|
140
|
+
| File | Format | Notes |
|
|
141
|
+
|---|---|---|
|
|
142
|
+
| `sbom.spdx.json` | SPDX 2.3 | Stream-written; validated before save |
|
|
143
|
+
| `sbom.spdx3.json` | SPDX 3.0.1 | JSON-LD graph format |
|
|
144
|
+
| `sbom.cdx.json` | CycloneDX 1.5 | Stream-written; validated before save; includes CPE |
|
|
145
|
+
| `sbom.html` | Interactive HTML | Dark-mode dashboard; file list capped at 5,000 for browser performance |
|
|
146
|
+
|
|
147
|
+
Use `--format spdx`, `--format cyclonedx`, etc. to generate only what you need.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## NTIA Compliance
|
|
152
|
+
|
|
153
|
+
The tool checks the [NTIA minimum elements](https://www.ntia.gov/report/2021/minimum-elements-software-bill-materials) at runtime:
|
|
154
|
+
|
|
155
|
+
| Element | How it's satisfied |
|
|
156
|
+
|---|---|
|
|
157
|
+
| Supplier name | `--supplier` flag |
|
|
158
|
+
| Component name | `--project-name` (or directory name) |
|
|
159
|
+
| Component version | `--project-version` |
|
|
160
|
+
| Unique identifiers | PURL + CPE generated for every dependency |
|
|
161
|
+
| Dependency relationships | `CONTAINS` / `DEPENDS_ON` relationships in all formats |
|
|
162
|
+
| Author of SBOM data | Tool name + version in `creationInfo` |
|
|
163
|
+
| Timestamp | UTC timestamp at generation time (or epoch with `--reproducible`) |
|
|
164
|
+
|
|
165
|
+
Any missing elements are reported as warnings at the end of every run.
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Architecture
|
|
170
|
+
|
|
171
|
+
| Module | Responsibility |
|
|
172
|
+
|---|---|
|
|
173
|
+
| `cli.py` | Entry point — argument parsing, progress display, orchestration |
|
|
174
|
+
| `scanner.py` | Parallel directory walk, license extraction, file hashing |
|
|
175
|
+
| `manifest_parser.py` | Manifest and lock file parsing for all supported ecosystems |
|
|
176
|
+
| `compilation_db.py` | Clang `compile_commands.json` and Kbuild `.cmd` parsing |
|
|
177
|
+
| `purl.py` | Canonical PURL generation |
|
|
178
|
+
| `cpe.py` | Best-effort CPE 2.3 generation |
|
|
179
|
+
| `vcs.py` | Git metadata extraction |
|
|
180
|
+
| `ntia.py` | NTIA minimum elements compliance check |
|
|
181
|
+
| `validator.py` | Structural validation for SPDX 2.3 and CycloneDX 1.5 |
|
|
182
|
+
| `spdx_generator.py` | SPDX 2.3 JSON output (in-memory + streaming) |
|
|
183
|
+
| `spdx3_generator.py` | SPDX 3.0.1 JSON-LD output |
|
|
184
|
+
| `cyclonedx_generator.py` | CycloneDX 1.5 JSON output (in-memory + streaming) |
|
|
185
|
+
| `html_generator.py` | Self-contained interactive HTML report |
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sbom-generator"
|
|
7
|
+
version = "1.1.0"
|
|
8
|
+
description = "Extract SPDX and CycloneDX SBOMs from open-source projects, including the Linux kernel"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [{name = "Fatih Tekin", email = "fatih.tekin.de@googlemail.com"}]
|
|
13
|
+
keywords = ["sbom", "spdx", "cyclonedx", "security", "linux-kernel", "compliance"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.9",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
"Topic :: Security",
|
|
23
|
+
"Topic :: Software Development :: Build Tools",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
]
|
|
26
|
+
dependencies = ["rich>=13.0"]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/DrFatihTekin/sbom-generator"
|
|
30
|
+
Repository = "https://github.com/DrFatihTekin/sbom-generator"
|
|
31
|
+
Issues = "https://github.com/DrFatihTekin/sbom-generator/issues"
|
|
32
|
+
|
|
33
|
+
[project.scripts]
|
|
34
|
+
sbom-extractor = "sbom_extractor.cli:main"
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.packages.find]
|
|
37
|
+
where = ["."]
|
|
38
|
+
include = ["sbom_extractor*"]
|