@better-internet/oss-verify 0.1.0-draft
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +133 -0
- package/dist/cli.mjs +2 -0
- package/dist/spec/SPEC.md +329 -0
- package/dist/spec/ci-providers.json +95 -0
- package/dist/spec/contexts/v1/oss-verified.jsonld +37 -0
- package/dist/spec/models.json +82 -0
- package/dist/spec/schemas/predicate.schema.json +138 -0
- package/dist/src/checks/blobs.js +112 -0
- package/dist/src/checks/llm-audit.js +207 -0
- package/dist/src/checks/osi-license.js +115 -0
- package/dist/src/checks/reuse.js +78 -0
- package/dist/src/checks/sbom/cargo.js +124 -0
- package/dist/src/checks/sbom/go.js +137 -0
- package/dist/src/checks/sbom/javascript.js +125 -0
- package/dist/src/checks/sbom/python.js +240 -0
- package/dist/src/checks/sbom/types.js +10 -0
- package/dist/src/checks/sbom.js +173 -0
- package/dist/src/cli.mjs +225 -0
- package/dist/src/git.js +27 -0
- package/dist/src/hash.js +2 -0
- package/dist/src/predicate.js +35 -0
- package/dist/src/types.js +2 -0
- package/package.json +56 -0
- package/spec/SPEC.md +329 -0
- package/spec/ci-providers.json +95 -0
- package/spec/contexts/v1/oss-verified.jsonld +37 -0
- package/spec/models.json +82 -0
- package/spec/schemas/predicate.schema.json +138 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
// Python ecosystem detector.
|
|
2
|
+
//
|
|
3
|
+
// Lockfile precedence (first match wins; we don't merge across managers):
|
|
4
|
+
// 1. uv.lock — TOML, modern uv format
|
|
5
|
+
// 2. poetry.lock — TOML, Poetry format
|
|
6
|
+
// 3. Pipfile.lock — JSON, Pipenv
|
|
7
|
+
// 4. requirements.txt — line-oriented, vanilla pip
|
|
8
|
+
//
|
|
9
|
+
// License lookup via PyPI's JSON API:
|
|
10
|
+
// GET https://pypi.org/pypi/<name>/<version>/json
|
|
11
|
+
// -> { info: { license: <string>, classifiers: [...] } }
|
|
12
|
+
//
|
|
13
|
+
// Many packages declare `info.license` as free-form ("MIT", "MIT License",
|
|
14
|
+
// "BSD-3-Clause" or worse, "see LICENSE file"). The Trove classifiers
|
|
15
|
+
// `License :: OSI Approved :: <name>` are far more reliable; we prefer
|
|
16
|
+
// them and fall back to the free-form field.
|
|
17
|
+
//
|
|
18
|
+
// Concurrency-limited + memoised, same pattern as cargo + go detectors.
|
|
19
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
20
|
+
import { join } from "node:path";
|
|
21
|
+
const PYPI = "https://pypi.org/pypi";
|
|
22
|
+
const PYPI_CONCURRENCY = 4;
|
|
23
|
+
const USER_AGENT = "oss-verify/0.1 (https://github.com/better-internet-org/oss-verify)";
|
|
24
|
+
// SPDX identifiers for the classifiers PyPI lists. Sparse list — covers the
|
|
25
|
+
// common cases; falls through to free-form `info.license` parsing otherwise.
|
|
26
|
+
const CLASSIFIER_TO_SPDX = {
|
|
27
|
+
"License :: OSI Approved :: MIT License": "MIT",
|
|
28
|
+
"License :: OSI Approved :: BSD License": "BSD-3-Clause",
|
|
29
|
+
"License :: OSI Approved :: Apache Software License": "Apache-2.0",
|
|
30
|
+
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)": "MPL-2.0",
|
|
31
|
+
"License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)": "MPL-1.1",
|
|
32
|
+
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)": "GPL-2.0-only",
|
|
33
|
+
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)": "GPL-3.0-only",
|
|
34
|
+
"License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)": "GPL-2.0-or-later",
|
|
35
|
+
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)": "GPL-3.0-or-later",
|
|
36
|
+
"License :: OSI Approved :: GNU Lesser General Public License v2 (LGPLv2)": "LGPL-2.0-only",
|
|
37
|
+
"License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)": "LGPL-3.0-only",
|
|
38
|
+
"License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)": "LGPL-2.0-or-later",
|
|
39
|
+
"License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)": "LGPL-3.0-or-later",
|
|
40
|
+
"License :: OSI Approved :: ISC License (ISCL)": "ISC",
|
|
41
|
+
"License :: OSI Approved :: Python Software Foundation License": "PSF-2.0",
|
|
42
|
+
"License :: OSI Approved :: GNU Affero General Public License v3": "AGPL-3.0-only",
|
|
43
|
+
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)": "AGPL-3.0-or-later",
|
|
44
|
+
"License :: OSI Approved :: Zope Public License": "ZPL-2.1",
|
|
45
|
+
"License :: OSI Approved :: Common Public License": "CPL-1.0",
|
|
46
|
+
"License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)": "EPL-1.0",
|
|
47
|
+
"License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)": "EPL-2.0",
|
|
48
|
+
};
|
|
49
|
+
export async function detect(ctx) {
|
|
50
|
+
const detected = pickLockfile(ctx.repoRoot);
|
|
51
|
+
if (!detected) {
|
|
52
|
+
// We saw a marker file (pyproject.toml, etc.) but no parseable lockfile.
|
|
53
|
+
// Fall back to "not implemented for this layout" rather than silently passing.
|
|
54
|
+
if (anyPythonMarker(ctx.repoRoot)) {
|
|
55
|
+
return {
|
|
56
|
+
ecosystem: "python",
|
|
57
|
+
components: [],
|
|
58
|
+
missing: [
|
|
59
|
+
"Python project detected but no supported lockfile found (uv.lock, poetry.lock, Pipfile.lock, requirements.txt). Run your package manager to materialise one before re-running.",
|
|
60
|
+
],
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
let pairs;
|
|
66
|
+
try {
|
|
67
|
+
pairs = detected.parser(readFileSync(detected.path, "utf8"));
|
|
68
|
+
}
|
|
69
|
+
catch (e) {
|
|
70
|
+
return {
|
|
71
|
+
ecosystem: "python",
|
|
72
|
+
components: [],
|
|
73
|
+
missing: [`${detected.kind} parse failed: ${e.message}`],
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
if (pairs.length === 0) {
|
|
77
|
+
return {
|
|
78
|
+
ecosystem: "python",
|
|
79
|
+
components: [],
|
|
80
|
+
missing: [],
|
|
81
|
+
details: `${detected.kind}: no deps`,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
const components = [];
|
|
85
|
+
const missing = [];
|
|
86
|
+
const queue = [...pairs];
|
|
87
|
+
const workers = Array.from({ length: PYPI_CONCURRENCY }, async () => {
|
|
88
|
+
while (queue.length > 0) {
|
|
89
|
+
const p = queue.shift();
|
|
90
|
+
if (!p)
|
|
91
|
+
break;
|
|
92
|
+
const license = await fetchPypiLicense(p.name, p.version);
|
|
93
|
+
if (license === undefined) {
|
|
94
|
+
missing.push(`${p.name}@${p.version}`);
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
components.push({
|
|
98
|
+
name: p.name,
|
|
99
|
+
version: p.version,
|
|
100
|
+
license,
|
|
101
|
+
purl: `pkg:pypi/${p.name.toLowerCase()}@${p.version}`,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
await Promise.all(workers);
|
|
106
|
+
components.sort((a, b) => a.name === b.name ? (a.version < b.version ? -1 : 1) : a.name < b.name ? -1 : 1);
|
|
107
|
+
return { ecosystem: "python", components, missing };
|
|
108
|
+
}
|
|
109
|
+
function pickLockfile(root) {
|
|
110
|
+
const cands = [
|
|
111
|
+
["uv.lock", parseTomlLockPackages],
|
|
112
|
+
["poetry.lock", parseTomlLockPackages],
|
|
113
|
+
["Pipfile.lock", parsePipfileLock],
|
|
114
|
+
["requirements.txt", parseRequirementsTxt],
|
|
115
|
+
];
|
|
116
|
+
for (const [kind, parser] of cands) {
|
|
117
|
+
const p = join(root, kind);
|
|
118
|
+
if (existsSync(p))
|
|
119
|
+
return { kind, path: p, parser };
|
|
120
|
+
}
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
function anyPythonMarker(root) {
|
|
124
|
+
for (const f of ["pyproject.toml", "setup.py", "Pipfile", "requirements.txt"]) {
|
|
125
|
+
if (existsSync(join(root, f)))
|
|
126
|
+
return true;
|
|
127
|
+
}
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Minimal TOML lock-package parser shared by uv.lock + poetry.lock. Both
|
|
132
|
+
* use the same `[[package]] / name = "..." / version = "..."` shape. We
|
|
133
|
+
* don't pull in a TOML library — the format is machine-generated and
|
|
134
|
+
* stable, so a regex over `[[package]]` blocks is sufficient.
|
|
135
|
+
*
|
|
136
|
+
* Skips entries whose `category = "dev"` (poetry) when present. uv.lock
|
|
137
|
+
* doesn't have a category field for the lockfile structure as of writing;
|
|
138
|
+
* its dev-deps are still listed and we conservatively include them, same
|
|
139
|
+
* as the cargo detector's runtime-vs-dev tradeoff.
|
|
140
|
+
*/
|
|
141
|
+
function parseTomlLockPackages(text) {
|
|
142
|
+
const out = [];
|
|
143
|
+
const seen = new Set();
|
|
144
|
+
const blocks = text.split(/^\[\[package\]\]\s*$/m);
|
|
145
|
+
for (let i = 1; i < blocks.length; i++) {
|
|
146
|
+
const block = blocks[i];
|
|
147
|
+
const name = matchField(block, "name");
|
|
148
|
+
const version = matchField(block, "version");
|
|
149
|
+
const category = matchField(block, "category");
|
|
150
|
+
if (!name || !version)
|
|
151
|
+
continue;
|
|
152
|
+
if (category === "dev")
|
|
153
|
+
continue;
|
|
154
|
+
const key = `${name}@${version}`;
|
|
155
|
+
if (seen.has(key))
|
|
156
|
+
continue;
|
|
157
|
+
seen.add(key);
|
|
158
|
+
out.push({ name, version });
|
|
159
|
+
}
|
|
160
|
+
return out;
|
|
161
|
+
}
|
|
162
|
+
function matchField(block, field) {
|
|
163
|
+
const m = block.match(new RegExp(`^${field}\\s*=\\s*"([^"]+)"`, "m"));
|
|
164
|
+
return m ? m[1] : null;
|
|
165
|
+
}
|
|
166
|
+
function parsePipfileLock(text) {
|
|
167
|
+
const data = JSON.parse(text);
|
|
168
|
+
const out = [];
|
|
169
|
+
for (const [name, entry] of Object.entries(data.default ?? {})) {
|
|
170
|
+
// `version` is in the form `==1.2.3`. Strip the leading `==` if present.
|
|
171
|
+
const v = (entry.version ?? "").replace(/^==/, "");
|
|
172
|
+
if (v)
|
|
173
|
+
out.push({ name, version: v });
|
|
174
|
+
}
|
|
175
|
+
return out;
|
|
176
|
+
}
|
|
177
|
+
function parseRequirementsTxt(text) {
|
|
178
|
+
const out = [];
|
|
179
|
+
const seen = new Set();
|
|
180
|
+
for (const raw of text.split("\n")) {
|
|
181
|
+
const line = raw.split("#", 1)[0].trim();
|
|
182
|
+
if (!line || line.startsWith("-") || line.startsWith("--"))
|
|
183
|
+
continue;
|
|
184
|
+
// Accept `name==version`. `~=` and `>=` are ranges; pip doesn't pin them
|
|
185
|
+
// without a lockfile — skip them rather than guessing.
|
|
186
|
+
const m = line.match(/^([A-Za-z0-9_.\-]+)\s*==\s*([A-Za-z0-9_.\-+]+)/);
|
|
187
|
+
if (!m)
|
|
188
|
+
continue;
|
|
189
|
+
const key = `${m[1]}@${m[2]}`;
|
|
190
|
+
if (seen.has(key))
|
|
191
|
+
continue;
|
|
192
|
+
seen.add(key);
|
|
193
|
+
out.push({ name: m[1], version: m[2] });
|
|
194
|
+
}
|
|
195
|
+
return out;
|
|
196
|
+
}
|
|
197
|
+
const licenseCache = new Map();
|
|
198
|
+
async function fetchPypiLicense(name, version) {
|
|
199
|
+
const key = `${name.toLowerCase()}@${version}`;
|
|
200
|
+
if (licenseCache.has(key))
|
|
201
|
+
return licenseCache.get(key);
|
|
202
|
+
try {
|
|
203
|
+
const url = `${PYPI}/${encodeURIComponent(name)}/${encodeURIComponent(version)}/json`;
|
|
204
|
+
const res = await fetch(url, {
|
|
205
|
+
headers: { accept: "application/json", "user-agent": USER_AGENT },
|
|
206
|
+
});
|
|
207
|
+
if (!res.ok) {
|
|
208
|
+
licenseCache.set(key, undefined);
|
|
209
|
+
return undefined;
|
|
210
|
+
}
|
|
211
|
+
const data = (await res.json());
|
|
212
|
+
// Prefer Trove classifiers — more reliable + maps cleanly to SPDX.
|
|
213
|
+
const classifiers = data.info?.classifiers ?? [];
|
|
214
|
+
const spdxFromClassifiers = classifiers
|
|
215
|
+
.map((c) => CLASSIFIER_TO_SPDX[c])
|
|
216
|
+
.filter((v) => Boolean(v));
|
|
217
|
+
if (spdxFromClassifiers.length > 0) {
|
|
218
|
+
const expr = spdxFromClassifiers.length === 1
|
|
219
|
+
? spdxFromClassifiers[0]
|
|
220
|
+
: `(${[...new Set(spdxFromClassifiers)].join(" OR ")})`;
|
|
221
|
+
licenseCache.set(key, expr);
|
|
222
|
+
return expr;
|
|
223
|
+
}
|
|
224
|
+
// Fall back to free-form `info.license`. Often this is a valid SPDX
|
|
225
|
+
// expression already ("MIT", "Apache-2.0"); occasionally it's a sentence
|
|
226
|
+
// ("see LICENSE"). The SPDX parser will reject the latter — which is
|
|
227
|
+
// the right outcome per SPEC §3.3.
|
|
228
|
+
const free = data.info?.license?.trim();
|
|
229
|
+
if (free && free.length > 0 && !/\s{2,}|\.|see /i.test(free)) {
|
|
230
|
+
licenseCache.set(key, free);
|
|
231
|
+
return free;
|
|
232
|
+
}
|
|
233
|
+
licenseCache.set(key, undefined);
|
|
234
|
+
return undefined;
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
licenseCache.set(key, undefined);
|
|
238
|
+
return undefined;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// SBOM ecosystem detector contract.
|
|
2
|
+
//
|
|
3
|
+
// Each detector inspects the repo root for ecosystem-specific manifest files
|
|
4
|
+
// and returns either:
|
|
5
|
+
// - `null` if no ecosystem-specific manifests are present
|
|
6
|
+
// - a `DetectorResult` describing what was found + the component list
|
|
7
|
+
//
|
|
8
|
+
// The orchestrator (../sbom.ts) runs detectors in order and combines results.
|
|
9
|
+
// If multiple detectors match, the SBOM is multi-ecosystem.
|
|
10
|
+
export {};
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
// SBOM dependency-license check (SPEC §3.3).
|
|
2
|
+
//
|
|
3
|
+
// Orchestrates per-ecosystem detectors:
|
|
4
|
+
// - sbom/javascript.ts — node_modules walk via Node resolution
|
|
5
|
+
// - sbom/cargo.ts — Cargo.lock + crates.io license lookup
|
|
6
|
+
// - sbom/go.ts — go.mod (detection-only stub)
|
|
7
|
+
// - sbom/python.ts — pyproject.toml/poetry/uv/pip (detection-only stub)
|
|
8
|
+
//
|
|
9
|
+
// Per SPEC §3.3 the SBOM MUST cover direct + transitive runtime dependencies,
|
|
10
|
+
// every package MUST declare at least one OSI-approved license, and the
|
|
11
|
+
// output MUST be reproducible from (SHA, cli_version). The CycloneDX 1.5
|
|
12
|
+
// output here is canonical-JSON serialised (no serialNumber, sorted keys
|
|
13
|
+
// + components).
|
|
14
|
+
//
|
|
15
|
+
// Multi-ecosystem repos (e.g. JS + Rust in the same root) produce a single
|
|
16
|
+
// SBOM merging components from all detectors that matched. Repos with no
|
|
17
|
+
// detected ecosystem fail closed — silently passing would hide a real gap.
|
|
18
|
+
import parseSpdx from "spdx-expression-parse";
|
|
19
|
+
import { sha256Hex } from "../hash.js";
|
|
20
|
+
import { fetchOsiApprovedIds, leafIdentifiers } from "./osi-license.js";
|
|
21
|
+
import { detect as detectCargo } from "./sbom/cargo.js";
|
|
22
|
+
import { detect as detectGo } from "./sbom/go.js";
|
|
23
|
+
import { detect as detectJavascript } from "./sbom/javascript.js";
|
|
24
|
+
import { detect as detectPython } from "./sbom/python.js";
|
|
25
|
+
const DETECTORS = [detectJavascript, detectCargo, detectGo, detectPython];
|
|
26
|
+
export async function checkSbom(ctx) {
|
|
27
|
+
const detections = [];
|
|
28
|
+
for (const d of DETECTORS) {
|
|
29
|
+
const r = await d(ctx);
|
|
30
|
+
if (r)
|
|
31
|
+
detections.push(r);
|
|
32
|
+
}
|
|
33
|
+
const meta = { name: ctx.repoUrl, version: "0.0.0" };
|
|
34
|
+
if (detections.length === 0) {
|
|
35
|
+
return fail("No supported ecosystem detected at the repo root. Supported today: JavaScript/Node " +
|
|
36
|
+
"(package.json) and Cargo (Cargo.lock). Go and Python are tracked as follow-up work " +
|
|
37
|
+
"and currently fail closed.", ctx, meta, []);
|
|
38
|
+
}
|
|
39
|
+
const allMissing = [];
|
|
40
|
+
const allComponents = [];
|
|
41
|
+
for (const det of detections) {
|
|
42
|
+
allMissing.push(...det.missing);
|
|
43
|
+
allComponents.push(...det.components);
|
|
44
|
+
}
|
|
45
|
+
// Sort by (purl) so output is deterministic across detector orderings.
|
|
46
|
+
allComponents.sort((a, b) => (a.purl < b.purl ? -1 : a.purl > b.purl ? 1 : 0));
|
|
47
|
+
const sbom = buildCycloneDx(ctx, meta, allComponents);
|
|
48
|
+
const sbomHash = sha256Hex(canonicalJson(sbom));
|
|
49
|
+
if (allMissing.length > 0) {
|
|
50
|
+
return {
|
|
51
|
+
result: {
|
|
52
|
+
pass: false,
|
|
53
|
+
details: `${allMissing.length} unresolved entr${allMissing.length === 1 ? "y" : "ies"}: ${allMissing.slice(0, 5).join(" | ")}${allMissing.length > 5 ? `, +${allMissing.length - 5} more` : ""}`,
|
|
54
|
+
},
|
|
55
|
+
sbomHash,
|
|
56
|
+
sbomFormat: "cyclonedx-1.5",
|
|
57
|
+
sbomUri: null,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
if (allComponents.length === 0) {
|
|
61
|
+
return {
|
|
62
|
+
result: {
|
|
63
|
+
pass: true,
|
|
64
|
+
details: `${detections.map((d) => d.ecosystem).join("+")}: no runtime deps`,
|
|
65
|
+
},
|
|
66
|
+
sbomHash,
|
|
67
|
+
sbomFormat: "cyclonedx-1.5",
|
|
68
|
+
sbomUri: null,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
const osi = await fetchOsiApprovedIds();
|
|
72
|
+
const violations = [];
|
|
73
|
+
const noLicense = [];
|
|
74
|
+
for (const c of allComponents) {
|
|
75
|
+
if (!c.license) {
|
|
76
|
+
noLicense.push(`${c.name}@${c.version}`);
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
const verdict = checkLicenseExpression(c.license, osi.ids);
|
|
80
|
+
if (!verdict.ok)
|
|
81
|
+
violations.push(`${c.name}@${c.version}: ${verdict.reason}`);
|
|
82
|
+
}
|
|
83
|
+
if (noLicense.length > 0 || violations.length > 0) {
|
|
84
|
+
const details = [
|
|
85
|
+
noLicense.length > 0
|
|
86
|
+
? `${noLicense.length} packages declare no license: ${noLicense.slice(0, 5).join(", ")}${noLicense.length > 5 ? `, +${noLicense.length - 5} more` : ""}`
|
|
87
|
+
: null,
|
|
88
|
+
violations.length > 0
|
|
89
|
+
? `${violations.length} packages with non-OSI licenses:\n ${violations.slice(0, 10).join("\n ")}`
|
|
90
|
+
: null,
|
|
91
|
+
]
|
|
92
|
+
.filter(Boolean)
|
|
93
|
+
.join("\n");
|
|
94
|
+
return {
|
|
95
|
+
result: { pass: false, details },
|
|
96
|
+
sbomHash,
|
|
97
|
+
sbomFormat: "cyclonedx-1.5",
|
|
98
|
+
sbomUri: null,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
return {
|
|
102
|
+
result: {
|
|
103
|
+
pass: true,
|
|
104
|
+
details: `${allComponents.length} runtime deps (${detections.map((d) => d.ecosystem).join("+")}), all OSI-approved`,
|
|
105
|
+
},
|
|
106
|
+
sbomHash,
|
|
107
|
+
sbomFormat: "cyclonedx-1.5",
|
|
108
|
+
sbomUri: null,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
function checkLicenseExpression(expr, osiIds) {
|
|
112
|
+
let parsed;
|
|
113
|
+
try {
|
|
114
|
+
parsed = parseSpdx(expr);
|
|
115
|
+
}
|
|
116
|
+
catch (e) {
|
|
117
|
+
return {
|
|
118
|
+
ok: false,
|
|
119
|
+
reason: `'${expr}' is not a valid SPDX expression: ${e.message}`,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
const leaves = leafIdentifiers(parsed);
|
|
123
|
+
if (leaves.length === 0)
|
|
124
|
+
return { ok: false, reason: `no SPDX identifiers in '${expr}'` };
|
|
125
|
+
const nonOsi = leaves.filter((id) => !osiIds.has(id));
|
|
126
|
+
if (nonOsi.length > 0)
|
|
127
|
+
return { ok: false, reason: `non-OSI leaves: ${nonOsi.join(", ")}` };
|
|
128
|
+
return { ok: true };
|
|
129
|
+
}
|
|
130
|
+
function buildCycloneDx(ctx, meta, components) {
|
|
131
|
+
return {
|
|
132
|
+
bomFormat: "CycloneDX",
|
|
133
|
+
specVersion: "1.5",
|
|
134
|
+
version: 1,
|
|
135
|
+
metadata: {
|
|
136
|
+
component: {
|
|
137
|
+
type: "application",
|
|
138
|
+
name: meta.name,
|
|
139
|
+
version: meta.version,
|
|
140
|
+
purl: `pkg:generic/${encodeURIComponent(meta.name)}@${meta.version}`,
|
|
141
|
+
externalReferences: [{ type: "vcs", url: ctx.repoUrl }],
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
components: components.map((c) => ({
|
|
145
|
+
type: "library",
|
|
146
|
+
name: c.name,
|
|
147
|
+
version: c.version,
|
|
148
|
+
purl: c.purl,
|
|
149
|
+
...(c.license ? { licenses: [{ expression: c.license }] } : { licenses: [] }),
|
|
150
|
+
})),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
function canonicalJson(value) {
|
|
154
|
+
return JSON.stringify(value, (_k, v) => {
|
|
155
|
+
if (v && typeof v === "object" && !Array.isArray(v)) {
|
|
156
|
+
const keys = Object.keys(v).sort();
|
|
157
|
+
const sorted = {};
|
|
158
|
+
for (const k of keys)
|
|
159
|
+
sorted[k] = v[k];
|
|
160
|
+
return sorted;
|
|
161
|
+
}
|
|
162
|
+
return v;
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
function fail(details, ctx, meta, components) {
|
|
166
|
+
const sbom = buildCycloneDx(ctx, meta, components);
|
|
167
|
+
return {
|
|
168
|
+
result: { pass: false, details },
|
|
169
|
+
sbomHash: sha256Hex(canonicalJson(sbom)),
|
|
170
|
+
sbomFormat: "cyclonedx-1.5",
|
|
171
|
+
sbomUri: null,
|
|
172
|
+
};
|
|
173
|
+
}
|
package/dist/src/cli.mjs
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { checkNoProprietaryBlobs } from "./checks/blobs.js";
|
|
4
|
+
import { runLlmAudit } from "./checks/llm-audit.js";
|
|
5
|
+
import { checkOsiLicense } from "./checks/osi-license.js";
|
|
6
|
+
import { checkReuse } from "./checks/reuse.js";
|
|
7
|
+
import { checkSbom } from "./checks/sbom.js";
|
|
8
|
+
import { commitSha, defaultBranch, repoUrlFromRemote } from "./git.js";
|
|
9
|
+
import { buildPredicate } from "./predicate.js";
|
|
10
|
+
function parseArgs(argv) {
|
|
11
|
+
const args = {
|
|
12
|
+
repoRoot: process.cwd(),
|
|
13
|
+
output: "report",
|
|
14
|
+
skipSbom: false,
|
|
15
|
+
reportJson: false,
|
|
16
|
+
};
|
|
17
|
+
for (let i = 2; i < argv.length; i++) {
|
|
18
|
+
const a = argv[i];
|
|
19
|
+
if (a === "--repo")
|
|
20
|
+
args.repoRoot = resolve(argv[++i] ?? ".");
|
|
21
|
+
else if (a === "--repo-url")
|
|
22
|
+
args.repoUrl = argv[++i];
|
|
23
|
+
else if (a === "--output")
|
|
24
|
+
args.output = argv[++i];
|
|
25
|
+
else if (a === "--skip-sbom")
|
|
26
|
+
args.skipSbom = true;
|
|
27
|
+
else if (a === "--report-json")
|
|
28
|
+
args.reportJson = true;
|
|
29
|
+
else if (a === "--help" || a === "-h") {
|
|
30
|
+
printHelp();
|
|
31
|
+
process.exit(0);
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
console.error(`unknown flag: ${a}`);
|
|
35
|
+
printHelp();
|
|
36
|
+
process.exit(2);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return args;
|
|
40
|
+
}
|
|
41
|
+
function printHelp() {
|
|
42
|
+
console.log(`oss-verify — deterministic OSS-compliance attestation CLI
|
|
43
|
+
|
|
44
|
+
Usage:
|
|
45
|
+
oss-verify [options]
|
|
46
|
+
|
|
47
|
+
Options:
|
|
48
|
+
--repo <path> Repository root (default: cwd)
|
|
49
|
+
--repo-url <url> Override repo URL (default: derived from git remote)
|
|
50
|
+
--output <mode> 'report' (human, default) | 'predicate' (in-toto JSON) | 'both'
|
|
51
|
+
--skip-sbom Pass the SBOM check (use only for non-JS projects until
|
|
52
|
+
other-ecosystem detectors ship)
|
|
53
|
+
--report-json INTERNAL/preview mode. Runs the full conformant
|
|
54
|
+
pipeline (deterministic + LLM) and emits a JSON status
|
|
55
|
+
report instead of a predicate; always exits 0 so the
|
|
56
|
+
caller can record fail states. NOT a substitute for
|
|
57
|
+
the conformant attestation flow — output is not a
|
|
58
|
+
valid predicate and MUST NOT be signed or published as
|
|
59
|
+
one. Used by the oss-verified watchlist for monthly
|
|
60
|
+
monitoring of tracked projects.
|
|
61
|
+
-h, --help Show this help
|
|
62
|
+
|
|
63
|
+
Required environment:
|
|
64
|
+
ANTHROPIC_API_KEY SPEC §7 LLM audit. The CLI exits 1 if missing —
|
|
65
|
+
per SPEC §4 the LLM step is mandatory and there
|
|
66
|
+
is no opt-out.
|
|
67
|
+
|
|
68
|
+
Exit codes:
|
|
69
|
+
0 all checks pass; predicate would be signed in CI
|
|
70
|
+
1 one or more checks fail; no predicate
|
|
71
|
+
2 CLI invocation error
|
|
72
|
+
`);
|
|
73
|
+
}
|
|
74
|
+
function pad(label, width = 22) {
|
|
75
|
+
return label.padEnd(width);
|
|
76
|
+
}
|
|
77
|
+
function statusGlyph(pass) {
|
|
78
|
+
return pass ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
|
|
79
|
+
}
|
|
80
|
+
async function main() {
|
|
81
|
+
const args = parseArgs(process.argv);
|
|
82
|
+
const ctx = {
|
|
83
|
+
repoRoot: args.repoRoot,
|
|
84
|
+
commitSha: commitSha(args.repoRoot),
|
|
85
|
+
repoUrl: args.repoUrl ?? repoUrlFromRemote(args.repoRoot),
|
|
86
|
+
};
|
|
87
|
+
const branch = defaultBranch(args.repoRoot);
|
|
88
|
+
if (args.output !== "predicate") {
|
|
89
|
+
console.error(`oss-verify on ${ctx.repoUrl}@${ctx.commitSha.slice(0, 8)} (${branch})`);
|
|
90
|
+
console.error("");
|
|
91
|
+
}
|
|
92
|
+
// Run deterministic checks
|
|
93
|
+
const reuse = checkReuse(ctx);
|
|
94
|
+
const { result: osi, osiResponseHash } = await checkOsiLicense(ctx);
|
|
95
|
+
const blobs = checkNoProprietaryBlobs(ctx);
|
|
96
|
+
const sbomRaw = await checkSbom(ctx);
|
|
97
|
+
const sbom = args.skipSbom
|
|
98
|
+
? { ...sbomRaw, result: { pass: true, details: "skipped via --skip-sbom" } }
|
|
99
|
+
: sbomRaw;
|
|
100
|
+
const criteria = {
|
|
101
|
+
reuse,
|
|
102
|
+
osi_license: osi,
|
|
103
|
+
dependency_licenses: sbom.result,
|
|
104
|
+
no_proprietary_blobs: blobs,
|
|
105
|
+
};
|
|
106
|
+
const deterministicPass = Object.values(criteria).every((c) => c.pass);
|
|
107
|
+
if (args.output !== "predicate") {
|
|
108
|
+
for (const [name, result] of Object.entries(criteria)) {
|
|
109
|
+
console.error(` ${statusGlyph(result.pass)} ${pad(name)}${result.details ? ` ${result.details.split("\n")[0]}` : ""}`);
|
|
110
|
+
if (result.details?.includes("\n")) {
|
|
111
|
+
for (const line of result.details.split("\n").slice(1)) {
|
|
112
|
+
console.error(` ${line}`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
console.error("");
|
|
117
|
+
console.error(deterministicPass
|
|
118
|
+
? "\x1b[32mPASS\x1b[0m all deterministic checks succeeded"
|
|
119
|
+
: "\x1b[31mFAIL\x1b[0m one or more deterministic checks failed");
|
|
120
|
+
}
|
|
121
|
+
// In `--report-json` mode the LLM step still runs (SPEC §7 mandatory) but
|
|
122
|
+
// any failure surfaces as an llm_verdict in the JSON rather than crashing
|
|
123
|
+
// the CLI — the watchlist runner needs a structured record either way.
|
|
124
|
+
const auditTry = async () => {
|
|
125
|
+
try {
|
|
126
|
+
return {
|
|
127
|
+
ok: true,
|
|
128
|
+
audit: await runLlmAudit(ctx, {
|
|
129
|
+
modelId: process.env.OSS_VERIFY_MODEL_ID || "claude-sonnet-4-6",
|
|
130
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
131
|
+
}),
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
catch (e) {
|
|
135
|
+
return { ok: false, error: e.message };
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
if (args.reportJson) {
|
|
139
|
+
// Internal/preview mode. Runs the full conformant pipeline (deterministic
|
|
140
|
+
// + LLM) and emits a JSON status report instead of a predicate. Always
|
|
141
|
+
// exits 0 so callers can record fail states (the watchlist runner does).
|
|
142
|
+
// NOT a substitute for the conformant attestation flow — output is not
|
|
143
|
+
// a valid predicate and MUST NOT be signed and published as one.
|
|
144
|
+
const a = await auditTry();
|
|
145
|
+
const llm = a.ok
|
|
146
|
+
? a.audit.verdict
|
|
147
|
+
: { verdict: "block", rationale: a.error, passes: 0 };
|
|
148
|
+
const report = {
|
|
149
|
+
mode: "report-json",
|
|
150
|
+
repo_url: ctx.repoUrl,
|
|
151
|
+
commit_sha: ctx.commitSha,
|
|
152
|
+
default_branch: branch,
|
|
153
|
+
checked_at: new Date().toISOString(),
|
|
154
|
+
deterministic_pass: deterministicPass,
|
|
155
|
+
llm_verdict: llm,
|
|
156
|
+
overall_pass: deterministicPass && llm.verdict === "pass",
|
|
157
|
+
criteria,
|
|
158
|
+
evidence: {
|
|
159
|
+
osi_response_hash: osiResponseHash,
|
|
160
|
+
sbom_hash: sbom.sbomHash,
|
|
161
|
+
sbom_format: sbom.sbomFormat,
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
165
|
+
process.exit(0);
|
|
166
|
+
}
|
|
167
|
+
if (!deterministicPass) {
|
|
168
|
+
// Per SPEC §4: CLI MUST refuse to produce a predicate if any deterministic stage fails.
|
|
169
|
+
if (args.output !== "report") {
|
|
170
|
+
console.error("predicate not emitted (deterministic checks failed)");
|
|
171
|
+
}
|
|
172
|
+
process.exit(1);
|
|
173
|
+
}
|
|
174
|
+
// LLM audit (SPEC §7). MAY block, MUST NOT grant.
|
|
175
|
+
const auditAttempt = await auditTry();
|
|
176
|
+
if (!auditAttempt.ok) {
|
|
177
|
+
console.error(`oss-verify error (LLM audit): ${auditAttempt.error}`);
|
|
178
|
+
process.exit(2);
|
|
179
|
+
}
|
|
180
|
+
const audit = auditAttempt.audit;
|
|
181
|
+
if (args.output !== "predicate") {
|
|
182
|
+
const glyph = audit.verdict.verdict === "block" ? "\x1b[31m✗\x1b[0m" : "\x1b[32m✓\x1b[0m";
|
|
183
|
+
console.error("");
|
|
184
|
+
console.error(` ${glyph} ${pad("llm_audit")} ${audit.verdict.rationale ?? ""}`);
|
|
185
|
+
}
|
|
186
|
+
if (audit.verdict.verdict === "block") {
|
|
187
|
+
if (args.output !== "report") {
|
|
188
|
+
console.error(`predicate not emitted (LLM audit blocked: ${audit.verdict.rationale})`);
|
|
189
|
+
}
|
|
190
|
+
process.exit(1);
|
|
191
|
+
}
|
|
192
|
+
const predicate = buildPredicate({
|
|
193
|
+
commitSha: ctx.commitSha,
|
|
194
|
+
repoUrl: ctx.repoUrl,
|
|
195
|
+
defaultBranch: branch,
|
|
196
|
+
criteria,
|
|
197
|
+
evidence: {
|
|
198
|
+
osi_response_hash: osiResponseHash,
|
|
199
|
+
sbom_hash: sbom.sbomHash,
|
|
200
|
+
sbom_format: sbom.sbomFormat,
|
|
201
|
+
sbom_uri: sbom.sbomUri,
|
|
202
|
+
exemptions: [],
|
|
203
|
+
llm_verdict: audit.verdict,
|
|
204
|
+
},
|
|
205
|
+
modelId: audit.modelId,
|
|
206
|
+
promptHash: audit.promptHash,
|
|
207
|
+
});
|
|
208
|
+
if (args.output === "predicate" || args.output === "both") {
|
|
209
|
+
// Emit ONLY the predicate body. `cosign attest-blob --predicate` reads
|
|
210
|
+
// this file as the predicate content and wraps it in its own in-toto
|
|
211
|
+
// Statement using `--type` for predicateType and the input file for
|
|
212
|
+
// the subject. Emitting a full Statement here would cause cosign to
|
|
213
|
+
// nest our Statement inside another Statement, so verifiers see
|
|
214
|
+
// `statement.predicate.predicate.criteria` and `predicateAllPass`
|
|
215
|
+
// reads the wrong layer as undefined.
|
|
216
|
+
process.stdout.write(`${JSON.stringify(predicate, null, 2)}\n`);
|
|
217
|
+
}
|
|
218
|
+
process.exit(0);
|
|
219
|
+
}
|
|
220
|
+
main().catch((err) => {
|
|
221
|
+
console.error(`oss-verify error: ${err instanceof Error ? err.message : String(err)}`);
|
|
222
|
+
if (err instanceof Error && err.stack)
|
|
223
|
+
console.error(err.stack);
|
|
224
|
+
process.exit(2);
|
|
225
|
+
});
|
package/dist/src/git.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
const exec = (cmd, cwd) => execSync(cmd, { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] }).trim();
|
|
3
|
+
export function commitSha(repoRoot) {
|
|
4
|
+
return exec("git rev-parse HEAD", repoRoot);
|
|
5
|
+
}
|
|
6
|
+
export function defaultBranch(repoRoot) {
|
|
7
|
+
// Prefer the symbolic ref of origin/HEAD; fall back to current branch.
|
|
8
|
+
try {
|
|
9
|
+
const ref = exec("git symbolic-ref --short refs/remotes/origin/HEAD", repoRoot);
|
|
10
|
+
return ref.replace(/^origin\//, "");
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return exec("git branch --show-current", repoRoot) || "main";
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
/** All files tracked by git at HEAD (working-tree paths, repo-relative, no submodules). */
|
|
17
|
+
export function lsFiles(repoRoot) {
|
|
18
|
+
return exec("git ls-files --cached --exclude-standard", repoRoot).split("\n").filter(Boolean);
|
|
19
|
+
}
|
|
20
|
+
export function repoUrlFromRemote(repoRoot) {
|
|
21
|
+
const url = exec("git config --get remote.origin.url", repoRoot);
|
|
22
|
+
// Normalise: drop .git suffix, prefer https form
|
|
23
|
+
return url
|
|
24
|
+
.replace(/^git@([^:]+):/, "https://$1/")
|
|
25
|
+
.replace(/\.git$/, "")
|
|
26
|
+
.replace(/\/$/, "");
|
|
27
|
+
}
|
package/dist/src/hash.js
ADDED