@bounded-systems/conformance-kit 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ {
2
+ "imports": {
3
+ "@bounded-systems/lone": "jsr:@bounded-systems/lone@^0.1",
4
+ "zod": "npm:zod@3",
5
+ "linkedom": "npm:linkedom@0.15.3"
6
+ }
7
+ }
@@ -0,0 +1,34 @@
1
+ // gates/semantic/gate.ts — bless each rendered page's DOM with lone (semantic HTML
2
+ // + a11y). Blocking: any error-severity finding fails CI. Run from the site root
3
+ // after the build. lone is consumed from JSR (jsr:@bounded-systems/lone), pinned by
4
+ // deno.lock — no git clone, no hand-managed sha (see deno.json import map).
5
+ //
6
+ // deno run --allow-read --allow-net gates/semantic/gate.ts
7
+ //
8
+ // Site-agnostic injection (Deno.env, both optional):
9
+ // $SEMANTIC_DIR directory of built HTML to bless (default "dist/blog").
10
+ // $SEMANTIC_SELECTOR CSS selector for the subject node to validate per page,
11
+ // falling back to <body> (default "article").
12
+ import { parseHTML } from "linkedom";
13
+ import { validate } from "@bounded-systems/lone";
14
+
15
+ const DIR = Deno.env.get("SEMANTIC_DIR") ?? "dist/blog";
16
+ const SELECTOR = Deno.env.get("SEMANTIC_SELECTOR") ?? "article";
17
+
18
+ let posts = 0, errors = 0, warns = 0;
19
+ for await (const e of Deno.readDir(DIR)) {
20
+ if (!e.name.endsWith(".html")) continue;
21
+ posts++;
22
+ const { document } = parseHTML(await Deno.readTextFile(`${DIR}/${e.name}`));
23
+ const subject = document.querySelector(SELECTOR) ?? document.body;
24
+ const { findings } = await validate(subject);
25
+ const errs = findings.filter((f) => f.severity === "error");
26
+ errors += errs.length;
27
+ warns += findings.length - errs.length;
28
+ if (findings.length) {
29
+ console.log(`\n${e.name} — ${errs.length} error(s), ${findings.length - errs.length} warn(s):`);
30
+ for (const f of findings) console.log(` [${f.severity}] ${f.code} ${f.path} — ${f.message}`);
31
+ } else console.log(`${e.name} — clean`);
32
+ }
33
+ console.log(`\nlone: ${posts} page(s) · ${errors} error(s) · ${warns} warn(s)`);
34
+ Deno.exit(errors > 0 ? 1 : 0);
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env node
2
+ // SEO technical gate — turns the site's discoverability contract into an ENFORCEABLE
3
+ // check over the BUILT dist/. SEO "best practices" are usually advice you hope you
4
+ // followed; this gate fails closed (exit 1) the moment the built bytes break one.
5
+ //
6
+ // node gates/seo-gate.mjs [distDir] # build gate (exit 1 on any violation)
7
+ //
8
+ // What it enforces:
9
+ // 1. canonical — every indexable page has exactly one <link rel="canonical">, and
10
+ // it is SELF-consistent: the canonical URL maps back to THIS file.
11
+ // 2. title — every indexable page has a non-empty <title>, unique across pages.
12
+ // 3. description— every indexable page has a non-empty <meta name="description">,
13
+ // unique across pages.
14
+ // 4. noindex — no indexable page carries an accidental robots `noindex` (the
15
+ // error page is the only place noindex is allowed/expected).
16
+ // 5. robots.txt — parses per RFC 9309: groups start with user-agent line(s); rules
17
+ // (allow/disallow) never precede a user-agent; Sitemap values are
18
+ // absolute URLs; the advertised sitemap resolves to a built file.
19
+ // 6. sitemap — every <loc> in sitemap.xml resolves to a built page (canonicalised),
20
+ // and every URL shares the site's single origin.
21
+ // 7. links — zero broken internal links across all pages.
22
+ //
23
+ // Pure + offline: reads dist/ only, no network. Zero-dep.
24
+ //
25
+ // Site-agnostic injection (all optional, neutral defaults):
26
+ // argv[2] / $DIST built output dir (default: "dist").
27
+ // $SEO_ERROR_PAGE the page exempt from canonical/title/desc + required to be
28
+ // noindex (default: "404.html").
29
+ // $SEO_DEPLOY_SIDECARS comma list of deploy-time paths to treat as live links
30
+ // (e.g. /rekor,/provenance.json,/resume.pdf).
31
+ import { readFile, readdir, access } from "node:fs/promises";
32
+ import { join, relative, dirname, resolve } from "node:path";
33
+
34
+ const dist = resolve(process.argv[2] || process.env.DIST || "dist");
35
+ const exists = async (p) => { try { await access(p); return true; } catch { return false; } };
36
+
37
+ const ERROR_PAGE = process.env.SEO_ERROR_PAGE || "404.html";
38
+ // Deploy-time sidecars: written by the deploy workflow (not the local/hermetic
39
+ // build), so a link to one is resolvable rather than dead.
40
+ const DEPLOY_SIDECARS = (process.env.SEO_DEPLOY_SIDECARS || "/rekor,/provenance.json,/site.sha256")
41
+ .split(",").map((s) => s.trim()).filter(Boolean);
42
+
43
+ let errors = 0;
44
+ const err = (m) => { console.error(` ✗ ${m}`); errors++; };
45
+
46
+ async function walk(dir) {
47
+ const out = [];
48
+ for (const e of await readdir(dir, { withFileTypes: true })) {
49
+ const abs = join(dir, e.name);
50
+ if (e.isDirectory()) out.push(...await walk(abs));
51
+ else if (e.name.endsWith(".html")) out.push(abs);
52
+ }
53
+ return out;
54
+ }
55
+
56
+ // Normalise a served path to a canonical key (drop index.html / .html / trailing /).
57
+ const canon = (p) => {
58
+ let s = p.replace(/\\/g, "/");
59
+ if (!s.startsWith("/")) s = "/" + s;
60
+ s = s.replace(/\/index\.html$/, "/").replace(/\.html$/, "");
61
+ if (s.length > 1) s = s.replace(/\/$/, "");
62
+ return s || "/";
63
+ };
64
+
65
+ // Resolve an internal href to "ok" / "dead" / "skip" (external/anchor/etc).
66
+ async function resolveHref(pageAbs, href) {
67
+ if (!href || /^(https?:|mailto:|tel:|#|data:)/i.test(href)) return "skip";
68
+ const clean = href.split("#")[0].split("?")[0];
69
+ if (!clean) return "skip";
70
+ if (DEPLOY_SIDECARS.some((s) => clean === s || clean.startsWith(s + "/"))) return "ok";
71
+ const base = clean.startsWith("/") ? join(dist, clean) : resolve(dirname(pageAbs), clean);
72
+ for (const cand of [base, base + ".html", join(base, "index.html")]) {
73
+ if (await exists(cand)) return "ok";
74
+ }
75
+ return "dead";
76
+ }
77
+
78
+ const head = (html) => (html.match(/<head[\s\S]*?<\/head>/i) || [""])[0];
79
+
80
+ async function main() {
81
+ if (!(await exists(dist))) { console.error(`✗ seo-gate: ${dist} not found — build first.`); process.exit(2); }
82
+
83
+ const pages = (await walk(dist)).sort();
84
+ const servedCanon = new Set(pages.map((p) => canon("/" + relative(dist, p))));
85
+
86
+ const isIndexable = (rel) => rel !== ERROR_PAGE;
87
+
88
+ const titles = new Map(); // title → first page (uniqueness)
89
+ const descriptions = new Map(); // description → first page (uniqueness)
90
+ let origin = null; // the single canonical origin, learned from page 1
91
+
92
+ // ---- per-page <head> contract -------------------------------------------------
93
+ for (const pageAbs of pages) {
94
+ const rel = relative(dist, pageAbs).replace(/\\/g, "/");
95
+ const html = await readFile(pageAbs, "utf8");
96
+ const h = head(html);
97
+
98
+ const robotsMetas = [...h.matchAll(/<meta\s+name="robots"\s+content="([^"]*)"\s*\/?>/gi)].map((m) => m[1]);
99
+ const hasNoindex = robotsMetas.some((c) => /\bnoindex\b/i.test(c));
100
+ if (isIndexable(rel) && hasNoindex) err(`${rel}: indexable page carries robots noindex`);
101
+ if (!isIndexable(rel) && !hasNoindex) err(`${rel}: error page should be noindex (missing robots noindex)`);
102
+
103
+ if (!isIndexable(rel)) continue;
104
+
105
+ const canons = [...h.matchAll(/<link\s+rel="canonical"\s+href="([^"]*)"\s*\/?>/gi)].map((m) => m[1]);
106
+ if (canons.length !== 1) {
107
+ err(`${rel}: ${canons.length} <link rel="canonical"> (want exactly 1)`);
108
+ } else {
109
+ const url = canons[0];
110
+ let u;
111
+ try { u = new URL(url); } catch { u = null; }
112
+ if (!u) err(`${rel}: canonical is not an absolute URL — ${url}`);
113
+ else {
114
+ const thisOrigin = u.origin;
115
+ if (origin === null) origin = thisOrigin;
116
+ else if (thisOrigin !== origin) err(`${rel}: canonical origin ${thisOrigin} ≠ site origin ${origin}`);
117
+ const want = canon("/" + rel);
118
+ const got = canon(u.pathname);
119
+ if (got !== want) err(`${rel}: canonical points at ${got} but this file serves ${want} (not self-consistent)`);
120
+ }
121
+ }
122
+
123
+ const title = (h.match(/<title>([\s\S]*?)<\/title>/i) || [, ""])[1].trim();
124
+ if (!title) err(`${rel}: empty or missing <title>`);
125
+ else if (titles.has(title)) err(`${rel}: duplicate <title> (also in ${titles.get(title)}): "${title}"`);
126
+ else titles.set(title, rel);
127
+
128
+ let desc = null;
129
+ for (const m of h.matchAll(/<meta\s+name="description"\s+content="([^"]*)"\s*\/?>/gi)) desc = m[1];
130
+ if (desc == null || !desc.trim()) err(`${rel}: empty or missing <meta name="description">`);
131
+ else if (descriptions.has(desc.trim())) err(`${rel}: duplicate meta description (also in ${descriptions.get(desc.trim())})`);
132
+ else descriptions.set(desc.trim(), rel);
133
+ }
134
+
135
+ // ---- robots.txt — RFC 9309 ------------------------------------------------------
136
+ const robotsPath = join(dist, "robots.txt");
137
+ if (!(await exists(robotsPath))) {
138
+ err("robots.txt: missing from dist/");
139
+ } else {
140
+ const lines = (await readFile(robotsPath, "utf8")).split(/\r?\n/);
141
+ let seenUserAgent = false;
142
+ let groupOpen = false;
143
+ const sitemaps = [];
144
+ lines.forEach((raw, i) => {
145
+ const line = raw.replace(/#.*$/, "").trim();
146
+ if (!line) return;
147
+ const m = /^([A-Za-z-]+)\s*:\s*(.*)$/.exec(line);
148
+ if (!m) { err(`robots.txt:${i + 1}: not a "field: value" record — ${raw.trim()}`); return; }
149
+ const field = m[1].toLowerCase();
150
+ const value = m[2].trim();
151
+ if (field === "user-agent") { seenUserAgent = true; groupOpen = true; }
152
+ else if (field === "allow" || field === "disallow") {
153
+ if (!groupOpen) err(`robots.txt:${i + 1}: ${field} rule before any user-agent (RFC 9309 groups start with user-agent)`);
154
+ if (value && !value.startsWith("/") && !value.startsWith("*")) err(`robots.txt:${i + 1}: ${field} path should start with "/" — ${value}`);
155
+ }
156
+ else if (field === "sitemap") {
157
+ try { new URL(value); sitemaps.push(value); } catch { err(`robots.txt:${i + 1}: Sitemap is not an absolute URL — ${value}`); }
158
+ }
159
+ else if (field === "crawl-delay" || field === "host") { /* tolerated non-standard extensions */ }
160
+ else { /* RFC 9309 §2.2.4: unrecognised fields are ignored, not an error */ }
161
+ });
162
+ if (!seenUserAgent) err("robots.txt: no user-agent group (RFC 9309 requires at least one group)");
163
+ for (const sm of sitemaps) {
164
+ const u = new URL(sm);
165
+ if (origin && u.origin === origin) {
166
+ const f = canon(u.pathname);
167
+ const built = pages.some((p) => canon("/" + relative(dist, p)) === f) || (await exists(join(dist, u.pathname.replace(/^\//, ""))));
168
+ if (!built) err(`robots.txt: advertised Sitemap ${sm} does not resolve to a built file`);
169
+ }
170
+ }
171
+ }
172
+
173
+ // ---- sitemap.xml — every <loc> resolves to a built page -------------------------
174
+ const sitemapPath = join(dist, "sitemap.xml");
175
+ if (!(await exists(sitemapPath))) {
176
+ err("sitemap.xml: missing from dist/");
177
+ } else {
178
+ const xml = await readFile(sitemapPath, "utf8");
179
+ const locs = [...xml.matchAll(/<loc>\s*([^<\s]+)\s*<\/loc>/gi)].map((m) => m[1]);
180
+ if (locs.length === 0) err("sitemap.xml: no <loc> entries");
181
+ for (const loc of locs) {
182
+ let u;
183
+ try { u = new URL(loc); } catch { err(`sitemap.xml: <loc> is not an absolute URL — ${loc}`); continue; }
184
+ if (origin && u.origin !== origin) err(`sitemap.xml: <loc> origin ${u.origin} ≠ site origin ${origin} — ${loc}`);
185
+ const key = canon(u.pathname);
186
+ if (!servedCanon.has(key)) err(`sitemap.xml: <loc> ${loc} does not resolve to a built page (${key})`);
187
+ }
188
+ }
189
+
190
+ // ---- internal link graph — zero broken links ------------------------------------
191
+ for (const pageAbs of pages) {
192
+ const rel = relative(dist, pageAbs).replace(/\\/g, "/");
193
+ const html = await readFile(pageAbs, "utf8");
194
+ for (const a of html.matchAll(/<a\s[^>]*href="([^"]*)"/gi)) {
195
+ const href = a[1];
196
+ if ((await resolveHref(pageAbs, href)) === "dead") err(`${rel}: dead internal link → ${href}`);
197
+ }
198
+ }
199
+
200
+ console.log("");
201
+ if (errors) {
202
+ console.error(`✗ seo-gate: ${errors} violation(s) across ${pages.length} built page(s).`);
203
+ process.exit(1);
204
+ }
205
+ console.log(`✓ seo-gate: ${pages.length} page(s) — canonical/title/description/robots/sitemap/links all consistent (origin ${origin}).`);
206
+ }
207
+
208
+ main().catch((e) => { console.error("✗ seo-gate: error —", e.message); process.exit(1); });
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env node
2
+ // SHACL runner — turns a site's emitted JSON-LD into an ENFORCEABLE contract.
3
+ //
4
+ // node gates/shacl-runner.mjs <shapes.ttl> <htmlDir>
5
+ //
6
+ // Schema.org alone is flexible guidance. Schema.org + SHACL is an enforceable
7
+ // contract: this runner extracts every JSON-LD block from the BUILT HTML under
8
+ // <htmlDir>, expands it to RDF, and validates it against the SHACL <shapes.ttl>. It
9
+ // FAILS (exit 1) unless the SHACL report says conforms: true — printing every
10
+ // violation.
11
+ //
12
+ // The shapes file is an INPUT and stays in the consuming site (each site's
13
+ // structured data differs); only the runner is shared. What it does NOT check
14
+ // (separate / manual): that the structured data matches the VISIBLE page content,
15
+ // and search-engine rich-result eligibility. SHACL is the enforceable STRUCTURAL
16
+ // contract.
17
+ //
18
+ // Site-agnostic injection:
19
+ // argv[2] path to the SHACL shapes Turtle file (required).
20
+ // argv[3] directory of built HTML to scan recursively (required).
21
+ // $SHACL_CONTEXT optional path to a JSON-LD context document to use instead of
22
+ // the built-in offline schema.org context (for non-schema.org
23
+ // vocabularies). The gate NEVER fetches a context over the network.
24
+ import { readFile, readdir } from "node:fs/promises";
25
+ import { existsSync } from "node:fs";
26
+ import { join, resolve } from "node:path";
27
+
28
+ import jsonld from "jsonld";
29
+ import { Parser as N3Parser } from "n3";
30
+ import rdf from "@zazuko/env-node"; // RDF/JS env with .dataset() + clownface (required by rdf-validate-shacl)
31
+ import SHACLValidator from "rdf-validate-shacl";
32
+
33
+ const shapesPath = process.argv[2];
34
+ const htmlDir = process.argv[3];
35
+ if (!shapesPath || !htmlDir) {
36
+ console.error("usage: shacl-runner <shapes.ttl> <htmlDir>");
37
+ process.exit(2);
38
+ }
39
+ const SHAPES = resolve(shapesPath);
40
+ const DIST = resolve(htmlDir);
41
+
42
+ // --- offline JSON-LD context ----------------------------------------------------
43
+ // Sites commonly emit `"@context": "https://schema.org"`. Expanding that normally
44
+ // dereferences the remote context over the network — non-deterministic and
45
+ // unavailable in hermetic CI. We serve a tiny local context instead: @vocab maps
46
+ // every type/property name to a stable https://schema.org/ IRI; a few URL-valued
47
+ // properties coerce to IRIs. A consumer with a different vocabulary points
48
+ // $SHACL_CONTEXT at its own context document.
49
+ const DEFAULT_SCHEMA_CONTEXT = {
50
+ "@context": {
51
+ "@vocab": "https://schema.org/",
52
+ url: { "@type": "@id" },
53
+ sameAs: { "@type": "@id" },
54
+ mainEntityOfPage: { "@type": "@id" },
55
+ },
56
+ };
57
+ const SCHEMA_IRIS = new Set([
58
+ "https://schema.org", "https://schema.org/",
59
+ "http://schema.org", "http://schema.org/",
60
+ ]);
61
+ const localContext = process.env.SHACL_CONTEXT
62
+ ? JSON.parse(await readFile(resolve(process.env.SHACL_CONTEXT), "utf8"))
63
+ : DEFAULT_SCHEMA_CONTEXT;
64
+ const documentLoader = async (urlArg) => {
65
+ if (SCHEMA_IRIS.has(urlArg) || process.env.SHACL_CONTEXT) {
66
+ return { contextUrl: null, documentUrl: urlArg, document: localContext };
67
+ }
68
+ throw new Error(`shacl-runner: refusing network fetch for context <${urlArg}> (offline gate)`);
69
+ };
70
+
71
+ // --- extract JSON-LD blocks from built HTML -------------------------------------
72
+ const LD_RE = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
73
+ function extractJsonLd(html) {
74
+ const out = [];
75
+ let m;
76
+ while ((m = LD_RE.exec(html)) !== null) {
77
+ // Many builders escape "<" as "<" before embedding; undo so JSON.parse sees valid text.
78
+ const raw = m[1].replace(/\\u003c/g, "<").trim();
79
+ if (raw) out.push(raw);
80
+ }
81
+ return out;
82
+ }
83
+
84
+ async function listHtmlFiles(dir) {
85
+ const out = [];
86
+ for (const e of await readdir(dir, { withFileTypes: true })) {
87
+ const abs = join(dir, e.name);
88
+ if (e.isDirectory()) out.push(...await listHtmlFiles(abs));
89
+ else if (e.name.endsWith(".html")) out.push(abs);
90
+ }
91
+ return out.sort();
92
+ }
93
+
94
+ // --- jsonld → rdf-ext dataset ---------------------------------------------------
95
+ async function jsonLdToDataset(doc) {
96
+ const nquads = await jsonld.toRDF(doc, { format: "application/n-quads", documentLoader });
97
+ const quads = new N3Parser({ format: "application/n-quads" }).parse(nquads);
98
+ return rdf.dataset(quads);
99
+ }
100
+ async function turtleToDataset(ttl) {
101
+ const quads = new N3Parser({ format: "text/turtle" }).parse(ttl);
102
+ return rdf.dataset(quads);
103
+ }
104
+
105
+ async function main() {
106
+ if (!existsSync(SHAPES)) { console.error(`✗ shacl-runner: shapes file not found — ${SHAPES}`); process.exit(2); }
107
+ if (!existsSync(DIST)) { console.error(`✗ shacl-runner: html dir not found — ${DIST}`); process.exit(2); }
108
+
109
+ const shapesTtl = await readFile(SHAPES, "utf8");
110
+ const shapes = await turtleToDataset(shapesTtl);
111
+ const validator = new SHACLValidator(shapes, { factory: rdf });
112
+
113
+ const files = await listHtmlFiles(DIST);
114
+ let totalBlocks = 0;
115
+ let failed = false;
116
+
117
+ for (const file of files) {
118
+ const rel = file.slice(DIST.length + 1);
119
+ const blocks = extractJsonLd(await readFile(file, "utf8"));
120
+ if (blocks.length === 0) {
121
+ console.log(` ${rel}: no JSON-LD (ok)`);
122
+ continue;
123
+ }
124
+ totalBlocks += blocks.length;
125
+
126
+ const data = rdf.dataset();
127
+ for (const block of blocks) {
128
+ const doc = JSON.parse(block);
129
+ const ds = await jsonLdToDataset(doc);
130
+ for (const q of ds) data.add(q);
131
+ }
132
+
133
+ const report = validator.validate(data);
134
+ if (report.conforms) {
135
+ console.log(` ${rel}: ${blocks.length} block(s) — conforms: true`);
136
+ } else {
137
+ failed = true;
138
+ console.log(` ${rel}: ${blocks.length} block(s) — conforms: FALSE`);
139
+ for (const r of report.results) {
140
+ const path = r.path?.value ?? "(node)";
141
+ const focus = r.focusNode?.value ?? "(?)";
142
+ const shape = r.sourceShape?.value ?? "";
143
+ const msg = r.message?.map((m) => m.value).join("; ") || r.sourceConstraintComponent?.value || "violation";
144
+ console.log(` ✗ ${focus} [${path}] ${msg} <${shape}>`);
145
+ }
146
+ }
147
+ }
148
+
149
+ console.log("");
150
+ if (failed) {
151
+ console.error(`✗ shacl-runner: JSON-LD does NOT conform to ${shapesPath}`);
152
+ process.exit(1);
153
+ }
154
+ console.log(`✓ shacl-runner: conforms: true — ${totalBlocks} JSON-LD block(s) across ${files.length} page(s) satisfy the SHACL contract`);
155
+ }
156
+
157
+ main().catch((err) => {
158
+ console.error("✗ shacl-runner: error —", err.message);
159
+ process.exit(1);
160
+ });
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env node
2
+ // known-vulnerability gate — turns "npm audit looked fine once" into a
3
+ // CONTINUOUSLY-ENFORCED member of the conformance contract. It runs `npm audit`
4
+ // over a project's lockfile and FAILS CLOSED (exit 1) when the count of known
5
+ // critical/high advisories exceeds a configurable threshold (default 0). The
6
+ // machine-readable result is exactly the shape lone's conformance() model consumes
7
+ // for `security.no-critical-vulns` (`{ knownCriticalOrHighVulns }`), so a clean run
8
+ // is what lets a site honestly assert that criterion — and a new advisory turns CI red.
9
+ //
10
+ // node gates/vuln-gate.mjs [projectDir] # build gate (exit 1 when over threshold)
11
+ //
12
+ // Everything is config-driven; NOTHING about any one project is hard-coded:
13
+ // argv[2] / $VULN_ROOT project dir containing the lockfile (default: ".")
14
+ // $VULN_OMIT_DEV "true" → audit production deps only (default: "true")
15
+ // A static site SHIPS no runtime deps, so production scope
16
+ // == the deployed bytes; the build toolchain's own
17
+ // advisories are a separate concern. Set "false" to audit all.
18
+ // $VULN_THRESHOLD highest tolerated known critical/high (default: 0)
19
+ // $VULN_REPORT path to write the JSON report (default: none)
20
+ //
21
+ // The pure parse/evaluation functions are exported for unit testing without a network.
22
+ import { writeFile, access } from "node:fs/promises";
23
+ import { resolve } from "node:path";
24
+ import { spawnSync } from "node:child_process";
25
+
26
+ // ── Pure core (network-free; unit-testable) ──────────────────────────────────
27
+
28
+ /** Extract the known critical + high count from an `npm audit --json` payload.
29
+ * Tolerates both the v2 `metadata.vulnerabilities` shape and a missing field. */
30
+ export function parseAudit(json) {
31
+ const v = (json && json.metadata && json.metadata.vulnerabilities) || {};
32
+ const critical = v.critical || 0;
33
+ const high = v.high || 0;
34
+ return { critical, high, known: critical + high };
35
+ }
36
+
37
+ /** Evaluate a parsed audit against the threshold. Pure: (parsed, threshold) → report. */
38
+ export function evaluateVulns({ critical, high, known }, threshold = 0) {
39
+ return {
40
+ passed: known <= threshold,
41
+ threshold,
42
+ critical,
43
+ high,
44
+ knownCriticalOrHighVulns: known,
45
+ // The envelope lone's conformance() consumes for `security.no-critical-vulns`.
46
+ vulns: { knownCriticalOrHighVulns: known },
47
+ };
48
+ }
49
+
50
+ // ── Impure runner ────────────────────────────────────────────────────────────
51
+
52
+ /** Run `npm audit --json` and return the parsed payload. npm exits non-zero when
53
+ * advisories exist, so we capture stdout regardless of exit code. */
54
+ export function runNpmAudit({ root = ".", omitDev = true } = {}) {
55
+ const args = ["audit", "--json", ...(omitDev ? ["--omit=dev"] : [])];
56
+ const res = spawnSync("npm", args, { cwd: resolve(root), encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
57
+ if (res.error) throw new Error(`cannot run npm audit (${res.error.message})`);
58
+ if (!res.stdout) throw new Error(`npm audit produced no JSON (stderr: ${(res.stderr || "").slice(0, 300)})`);
59
+ return JSON.parse(res.stdout);
60
+ }
61
+
62
+ /** Audit + evaluate → report. Exposed for programmatic use and the kit's own test. */
63
+ export function runVulnGate({ root = ".", omitDev = true, threshold = 0 } = {}) {
64
+ return evaluateVulns(parseAudit(runNpmAudit({ root, omitDev })), threshold);
65
+ }
66
+
67
+ // ── CLI ──────────────────────────────────────────────────────────────────────
68
+
69
+ async function main() {
70
+ const root = resolve(process.argv[2] && !process.argv[2].startsWith("--") ? process.argv[2] : process.env.VULN_ROOT || ".");
71
+ const exists = async (p) => { try { await access(p); return true; } catch { return false; } };
72
+ if (!(await exists(resolve(root, "package-lock.json"))) && !(await exists(resolve(root, "npm-shrinkwrap.json")))) {
73
+ console.error(`✗ vuln-gate: no package-lock.json under ${root} — nothing to audit.`);
74
+ process.exit(2);
75
+ }
76
+ const omitDev = (process.env.VULN_OMIT_DEV ?? "true").trim() !== "false";
77
+ const threshold = Number.parseInt(process.env.VULN_THRESHOLD ?? "0", 10);
78
+ if (!Number.isInteger(threshold) || threshold < 0) {
79
+ console.error(`✗ vuln-gate: $VULN_THRESHOLD must be an integer ≥ 0 (got "${process.env.VULN_THRESHOLD}")`);
80
+ process.exit(2);
81
+ }
82
+
83
+ const report = runVulnGate({ root, omitDev, threshold });
84
+ if (process.env.VULN_REPORT) {
85
+ await writeFile(resolve(process.env.VULN_REPORT), JSON.stringify(report, null, 2) + "\n");
86
+ }
87
+
88
+ const scope = omitDev ? "production deps" : "all deps";
89
+ const line = `vuln-gate: ${report.knownCriticalOrHighVulns} known critical/high in ${scope} (${report.critical} critical, ${report.high} high) · threshold ${threshold}`;
90
+ if (!report.passed) {
91
+ console.error(`✗ ${line}`);
92
+ console.error(` a known critical/high advisory exceeds the threshold — fix it, or (if accepted) raise $VULN_THRESHOLD.`);
93
+ process.exit(1);
94
+ }
95
+ console.log(`✓ ${line}`);
96
+ }
97
+
98
+ // Only run the CLI when invoked directly (not when imported by a test).
99
+ if (import.meta.url === `file://${process.argv[1]}`) {
100
+ main().catch((e) => { console.error("✗ vuln-gate: error —", e.stack || e.message); process.exit(1); });
101
+ }
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env node
2
+ // gen-cid — content-address the served site as an IPFS UnixFS directory CID, with
3
+ // NO daemon and NO new dependency, and record it in the build provenance alongside
4
+ // the existing digests (the site.sha256 manifest hash).
5
+ //
6
+ // node generators/gen-cid.mjs # uses ./dist
7
+ // DIST=out node generators/gen-cid.mjs
8
+ //
9
+ // Runs LAST, after every served byte exists. It builds the exact UnixFS dag-pb DAG
10
+ // `ipfs add -r` would (classic layout: sha2-256, 256 KiB fixed chunker, no raw
11
+ // leaves), so the reported CIDv1 re-derives from the served bytes by any IPFS
12
+ // implementation. The file set is exactly the signed whole-site manifest
13
+ // ($DIST/site.sha256) when present, so the CID and the manifest cover identical
14
+ // content; otherwise it walks $DIST with the same sidecar exclusions. No pinning,
15
+ // no DNSLink — just a portable address.
16
+ //
17
+ // Recorded into $DIST/provenance.json (merged if gen-provenance already wrote it;
18
+ // created minimally for a local build). provenance.json is excluded from the
19
+ // manifest + the CID set, so there is no circularity. Site-agnostic: the only knob
20
+ // is $DIST.
21
+ import { readFile, writeFile, readdir, access } from "node:fs/promises";
22
+ import { createHash } from "node:crypto";
23
+ import { join, relative, resolve } from "node:path";
24
+
25
+ // $DIST may be absolute or relative-to-cwd (resolve handles both); default ./dist.
26
+ const dist = resolve(process.cwd(), process.env.DIST || "dist");
27
+ const exists = async (p) => { try { await access(p); return true; } catch { return false; } };
28
+
29
+ const CHUNK = 262144; // kubo default fixed chunker
30
+ const MAX_LINKS = 174; // kubo default balanced-layout fanout
31
+
32
+ // Same sidecars gen-sitemanifest.mjs excludes from site.sha256 (they describe the
33
+ // site, they are not the site) — plus any Sigstore bundle, which is written after.
34
+ const EXCLUDE = new Set([
35
+ "site.sha256", "provenance.json", "rekor/index.html",
36
+ "attestation.intoto.json", "_headers", "_redirects", "_routes.json",
37
+ ]);
38
+ const isExcluded = (rel) => EXCLUDE.has(rel) || rel.endsWith(".sigstore.json") || rel.startsWith("rekor/");
39
+
40
+ // ---- protobuf + dag-pb + UnixFS (single-block + balanced multi-block) --------
41
+ const varint = (n) => { const o = []; let v = BigInt(n); do { let b = Number(v & 0x7fn); v >>= 7n; if (v) b |= 0x80; o.push(b); } while (v); return Buffer.from(o); };
42
+ const lenDelim = (tag, buf) => Buffer.concat([varint(tag), varint(buf.length), buf]);
43
+ const vfield = (tag, n) => Buffer.concat([varint(tag), varint(n)]);
44
+ const sha256 = (buf) => createHash("sha256").update(buf).digest();
45
+ const multihash = (buf) => Buffer.concat([Buffer.from([0x12, 0x20]), sha256(buf)]); // 0x12 sha2-256, 0x20 len
46
+
47
+ const B32 = "abcdefghijklmnopqrstuvwxyz234567";
48
+ const base32 = (bytes) => { let bits = 0, value = 0, out = ""; for (const b of bytes) { value = (value << 8) | b; bits += 8; while (bits >= 5) { out += B32[(value >>> (bits - 5)) & 31]; bits -= 5; } } if (bits > 0) out += B32[(value << (5 - bits)) & 31]; return out; };
49
+ const B58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
50
+ const base58 = (bytes) => { let zeros = 0; while (zeros < bytes.length && bytes[zeros] === 0) zeros++; let n = 0n; for (const b of bytes) n = n * 256n + BigInt(b); let out = ""; while (n > 0n) { out = B58[Number(n % 58n)] + out; n /= 58n; } return "1".repeat(zeros) + out; };
51
+ const cidV1Base32 = (mh) => "b" + base32(Buffer.concat([Buffer.from([0x01, 0x70]), mh])); // 0x01 v1, 0x70 dag-pb
52
+ const cidV0Base58 = (mh) => base58(mh); // v0 == bare dag-pb multihash, base58btc
53
+
54
+ // UnixFS Data: Type(1) File=2 / Directory=1, Data(2), filesize(3), blocksizes(4 repeated)
55
+ const ufLeaf = (data) => Buffer.concat([vfield(0x08, 2), lenDelim(0x12, data), vfield(0x18, data.length)]);
56
+ const ufFileRoot = (filesize, blocksizes) => Buffer.concat([vfield(0x08, 2), vfield(0x18, filesize), ...blocksizes.map((b) => vfield(0x20, b))]);
57
+ const ufDir = () => vfield(0x08, 1);
58
+ // dag-pb PBNode: Links(2) entries first, then Data(1). PBLink: Hash(1), Name(2), Tsize(3)
59
+ const pbLink = (mh, name, tsize) => Buffer.concat([lenDelim(0x0a, mh), lenDelim(0x12, Buffer.from(name, "utf8")), vfield(0x18, tsize)]);
60
+ const pbNode = (linkBufs, data) => Buffer.concat([...linkBufs.map((l) => lenDelim(0x12, l)), ...(data ? [lenDelim(0x0a, data)] : [])]);
61
+
62
+ // → { mh, size } size = cumulative (this block's bytes + all descendants)
63
+ function encodeFile(content) {
64
+ if (content.length <= CHUNK) { const node = pbNode([], ufLeaf(content)); return { mh: multihash(node), size: node.length }; }
65
+ let layer = [];
66
+ for (let i = 0; i < content.length; i += CHUNK) { const c = content.subarray(i, i + CHUNK); const node = pbNode([], ufLeaf(c)); layer.push({ mh: multihash(node), size: node.length, bs: c.length }); }
67
+ while (layer.length > 1) {
68
+ const next = [];
69
+ for (let i = 0; i < layer.length; i += MAX_LINKS) {
70
+ const group = layer.slice(i, i + MAX_LINKS);
71
+ const blocksizes = group.map((g) => g.bs);
72
+ const filesize = blocksizes.reduce((a, b) => a + b, 0);
73
+ const node = pbNode(group.map((g) => pbLink(g.mh, "", g.size)), ufFileRoot(filesize, blocksizes));
74
+ next.push({ mh: multihash(node), size: node.length + group.reduce((a, g) => a + g.size, 0), bs: filesize });
75
+ }
76
+ layer = next;
77
+ }
78
+ return { mh: layer[0].mh, size: layer[0].size };
79
+ }
80
+
81
+ // A directory tree node: { dirs: Map<name,node>, files: Map<name,absPath> }
82
+ const newDir = () => ({ dirs: new Map(), files: new Map() });
83
+ function insert(treeRoot, relParts, abs) {
84
+ let node = treeRoot;
85
+ for (let i = 0; i < relParts.length - 1; i++) { const seg = relParts[i]; if (!node.dirs.has(seg)) node.dirs.set(seg, newDir()); node = node.dirs.get(seg); }
86
+ node.files.set(relParts[relParts.length - 1], abs);
87
+ }
88
+ async function encodeDir(node) {
89
+ const links = [];
90
+ for (const [name, abs] of node.files) { const r = encodeFile(await readFile(abs)); links.push({ name, mh: r.mh, size: r.size }); }
91
+ for (const [name, child] of node.dirs) { const r = await encodeDir(child); links.push({ name, mh: r.mh, size: r.size }); }
92
+ links.sort((a, b) => Buffer.compare(Buffer.from(a.name), Buffer.from(b.name))); // go-ipfs sorts dir links by raw name
93
+ const node2 = pbNode(links.map((l) => pbLink(l.mh, l.name, l.size)), ufDir());
94
+ return { mh: multihash(node2), size: node2.length + links.reduce((a, l) => a + l.size, 0) };
95
+ }
96
+
97
+ // ---- collect the served file set ---------------------------------------------
98
+ async function fromManifest() {
99
+ const text = await readFile(join(dist, "site.sha256"), "utf8");
100
+ return text.trim().split("\n").filter(Boolean).map((line) => line.slice(line.indexOf(" ") + 2));
101
+ }
102
+ async function fromWalk() {
103
+ const out = [];
104
+ const walk = async (dir) => {
105
+ for (const ent of await readdir(dir, { withFileTypes: true })) {
106
+ const abs = join(dir, ent.name); const rel = relative(dist, abs);
107
+ if (ent.isDirectory()) { await walk(abs); continue; }
108
+ if (isExcluded(rel)) continue;
109
+ out.push(rel);
110
+ }
111
+ };
112
+ await walk(dist);
113
+ return out;
114
+ }
115
+
116
+ const usedManifest = await exists(join(dist, "site.sha256"));
117
+ const rels = (usedManifest ? await fromManifest() : await fromWalk()).filter((r) => !isExcluded(r)).sort();
118
+ const tree = newDir();
119
+ for (const rel of rels) insert(tree, rel.split("/"), join(dist, rel));
120
+ const r = await encodeDir(tree);
121
+ const cid = cidV1Base32(r.mh);
122
+ const cidV0 = cidV0Base58(r.mh);
123
+
124
+ // ---- record into the build provenance ----------------------------------------
125
+ const provPath = join(dist, "provenance.json");
126
+ const provenance = (await exists(provPath)) ? JSON.parse(await readFile(provPath, "utf8")) : { scope: "entire-site" };
127
+ provenance.contentAddress = {
128
+ ...(provenance.contentAddress || {}),
129
+ ipfs: {
130
+ cid,
131
+ cidV0,
132
+ codec: "dag-pb",
133
+ multihash: "sha2-256",
134
+ scope: usedManifest ? "served-site (site.sha256 file set)" : "served-site (dist minus provenance sidecars)",
135
+ fileCount: rels.length,
136
+ pinned: false,
137
+ dnslink: false,
138
+ derivation: "generators/gen-cid.mjs — zero-dep UnixFS v1 (dag-pb, sha2-256, 256 KiB fixed chunker, no raw leaves)",
139
+ note: "IPFS UnixFS directory CID over the served site, computed with no daemon. Re-derives from the served bytes: `ipfs add -rQ --cid-version=1` over the same file set yields this CID (or `ipfs add -rQ` then `ipfs cid base32` from the v0 form).",
140
+ },
141
+ };
142
+ await writeFile(provPath, JSON.stringify(provenance, null, 2) + "\n");
143
+
144
+ console.log(`✓ ipfs CID: ${cid} (${rels.length} files, ${usedManifest ? "from site.sha256" : "from dist walk"}) → provenance.json contentAddress.ipfs`);