@bounded-systems/conformance-kit 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env node
2
+ // gen-identity — emit a did:web identity + a résumé (or any JSON subject) as a W3C
3
+ // Verifiable Credential 2.0.
4
+ //
5
+ // IDENTITY_DOMAIN=example.com IDENTITY_REPO=owner/repo \
6
+ // node generators/gen-identity.mjs
7
+ //
8
+ // Writes:
9
+ // $DIST/.well-known/did.json — minimal did:web:<DOMAIN> document
10
+ // $DIST/api/v1/resume.vc.json — the subject as a W3C VC 2.0; credentialSubject
11
+ // is the input JSON verbatim, issuer is the did
12
+ //
13
+ // Keyless by design: there is no held signing key. The VC's proof is an ENVELOPING
14
+ // Sigstore bundle minted in CI (cosign sign-blob → Fulcio cert from the GitHub
15
+ // Actions OIDC identity → Rekor), served alongside as resume.vc.json.sigstore.json.
16
+ // So the did:web document advertises the Sigstore verification path as a service
17
+ // rather than a static public key — bound to $IDENTITY_REPO's OIDC identity.
18
+ //
19
+ // Site-agnostic injection:
20
+ // $IDENTITY_DOMAIN domain for the DID + site origin (required).
21
+ // $IDENTITY_REPO "owner/repo" for the cert-identity regexp (required).
22
+ // $DIST output dir (default: cwd/dist).
23
+ // $IDENTITY_SUBJECT path to the credentialSubject JSON (default:
24
+ // $DIST/resume.json).
25
+ // $IDENTITY_SUBJECT_SCHEMA optional JSON Schema the subject must satisfy
26
+ // (validated with the kit's schema-validate.mjs).
27
+ // $IDENTITY_VC_NAME VC `name` (default: "<subject.basics.name> — Résumé").
28
+ // $IDENTITY_VC_DESCRIPTION VC `description` (default: generic).
29
+ // $IDENTITY_VALID_FROM_PATH dotted path into the subject for validFrom (default:
30
+ // "meta.lastModified"); omitted if absent.
31
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
32
+ import { dirname, join, resolve } from "node:path";
33
+ import { validateSchema } from "../lib/schema-validate.mjs";
34
+
35
+ const DOMAIN = process.env.IDENTITY_DOMAIN;
36
+ const REPO = process.env.IDENTITY_REPO;
37
+ if (!DOMAIN || !REPO) { console.error("✗ gen-identity: IDENTITY_DOMAIN and IDENTITY_REPO are required"); process.exit(2); }
38
+
39
+ const dist = resolve(process.cwd(), process.env.DIST || "dist");
40
+ const SITE = `https://${DOMAIN}`;
41
+ const DID = `did:web:${DOMAIN}`;
42
+ const subjectPath = process.env.IDENTITY_SUBJECT || join(dist, "resume.json");
43
+ const validFromPath = process.env.IDENTITY_VALID_FROM_PATH || "meta.lastModified";
44
+
45
+ const readJson = async (p) => JSON.parse(await readFile(p, "utf8"));
46
+ const writeJson = async (p, obj) => { await mkdir(dirname(p), { recursive: true }); await writeFile(p, JSON.stringify(obj, null, 2) + "\n"); };
47
+ const dotGet = (obj, path) => path.split(".").reduce((o, k) => o?.[k], obj);
48
+
49
+ const subject = await readJson(subjectPath);
50
+ const subjectName = subject?.basics?.name || DOMAIN;
51
+ const vcName = process.env.IDENTITY_VC_NAME || `${subjectName} — Résumé`;
52
+ const vcDescription = process.env.IDENTITY_VC_DESCRIPTION ||
53
+ `Issued as a Verifiable Credential. The cryptographic proof is an enveloping Sigstore bundle served alongside (resume.vc.json.sigstore.json), keyless and bound to the source repo's GitHub Actions OIDC identity.`;
54
+
55
+ // ---- did:web document --------------------------------------------------------
56
+ // Minimal + honest: no verificationMethod, because there is no held key. The
57
+ // assertion path is keyless Sigstore (Fulcio/Rekor), surfaced as a service so a
58
+ // verifier knows exactly how to check a credential this DID issues.
59
+ const did = {
60
+ "@context": [
61
+ "https://www.w3.org/ns/did/v1",
62
+ "https://w3id.org/security/suites/jws-2020/v1",
63
+ ],
64
+ id: DID,
65
+ controller: DID,
66
+ alsoKnownAs: [SITE, `${SITE}/`],
67
+ service: [
68
+ { id: `${DID}#resume`, type: "VerifiableCredentialService", serviceEndpoint: `${SITE}/api/v1/resume.vc.json` },
69
+ { id: `${DID}#profile`, type: "LinkedDomains", serviceEndpoint: SITE },
70
+ {
71
+ id: `${DID}#sigstore`,
72
+ type: "SigstoreKeylessVerification",
73
+ serviceEndpoint: {
74
+ oidcIssuer: "https://token.actions.githubusercontent.com",
75
+ certificateIdentityRegexp: `^https://github.com/${REPO}/`,
76
+ transparencyLog: "https://rekor.sigstore.dev",
77
+ note: "Credentials are signed with an enveloping Sigstore bundle (e.g. resume.vc.json.sigstore.json), not an embedded key proof.",
78
+ },
79
+ },
80
+ ],
81
+ };
82
+
83
+ // ---- subject as a Verifiable Credential 2.0 ---------------------------------
84
+ // credentialSubject is the input JSON VERBATIM (so it keeps satisfying any schema
85
+ // it was built against). validFrom is a content fact (a dotted path into the
86
+ // subject), never a wall clock — keeps the VC deterministic.
87
+ const validFrom = dotGet(subject, validFromPath);
88
+ const vc = {
89
+ "@context": ["https://www.w3.org/ns/credentials/v2"],
90
+ id: `${SITE}/api/v1/resume.vc.json`,
91
+ type: ["VerifiableCredential"],
92
+ issuer: DID,
93
+ ...(validFrom ? { validFrom } : {}),
94
+ name: vcName,
95
+ description: vcDescription,
96
+ credentialSubject: subject,
97
+ };
98
+
99
+ await writeJson(join(dist, ".well-known", "did.json"), did);
100
+ await writeJson(join(dist, "api", "v1", "resume.vc.json"), vc);
101
+
102
+ // ---- self-checks -------------------------------------------------------------
103
+ if (process.env.IDENTITY_SUBJECT_SCHEMA) {
104
+ const schema = await readJson(process.env.IDENTITY_SUBJECT_SCHEMA);
105
+ const errs = validateSchema(schema, vc.credentialSubject);
106
+ if (errs.length) {
107
+ console.error("✗ VC credentialSubject no longer satisfies its schema:");
108
+ for (const e of errs) console.error(` ${e}`);
109
+ process.exit(1);
110
+ }
111
+ }
112
+ const vcErrs = [];
113
+ if (vc["@context"]?.[0] !== "https://www.w3.org/ns/credentials/v2") vcErrs.push("missing/!first VC 2.0 @context");
114
+ if (!Array.isArray(vc.type) || !vc.type.includes("VerifiableCredential")) vcErrs.push("type must include VerifiableCredential");
115
+ if (!vc.issuer) vcErrs.push("missing issuer");
116
+ if (!vc.credentialSubject) vcErrs.push("missing credentialSubject");
117
+ if (did.id !== DID) vcErrs.push("did id mismatch");
118
+ if (vcErrs.length) { console.error("✗ identity documents malformed:"); for (const e of vcErrs) console.error(` ${e}`); process.exit(1); }
119
+
120
+ console.log(`✓ identity: ${DID} → .well-known/did.json · VC 2.0 → api/v1/resume.vc.json (keyless-signed in CI)`);
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env node
2
+ // Reader-view snapshot generator — for every built page, emit a clean READER
3
+ // extraction (the same Readability engine that powers Firefox/Safari Reader) as
4
+ // both HTML and Markdown. The Markdown is the durable, analysis-friendly twin of
5
+ // the page: machine-readable, diffable, and far easier to run NLP / LLM analysis
6
+ // over than scraping live HTML — and it doubles as the AI-readable Markdown sibling
7
+ // (`semantic.ai-readability`). A non-empty extraction is also the PROOF of the
8
+ // "reader survivability" the structure-audit grades (`readerOk`).
9
+ //
10
+ // node generators/gen-snapshots.mjs [distDir] # write <page>.reader.{html,md}
11
+ //
12
+ // Pure (no browser, no network): linkedom parses the DOM, @mozilla/readability
13
+ // extracts the article, turndown renders Markdown. (The PRINTED/PDF view needs a
14
+ // real print-CSS renderer — tezcatl --pdf locally — and is a separate generator.)
15
+ //
16
+ // Config-driven; NOTHING about any one site is hard-coded:
17
+ // argv[2] / $SNAPSHOT_DIST built output dir (default: "dist")
18
+ // $SNAPSHOT_PAGES comma list of page paths under dist (default: every *.html)
19
+ // $SNAPSHOT_BASE_URL site origin, recorded as `source` in the front-matter
20
+ // $SNAPSHOT_SUFFIX output basename suffix (default: ".reader")
21
+ //
22
+ // The pure extract/markdown functions are exported for unit testing.
23
+ import { writeFile, readFile, readdir, access } from "node:fs/promises";
24
+ import { resolve, join, relative, dirname, basename, extname } from "node:path";
25
+ import { parseHTML } from "linkedom";
26
+ import { Readability } from "@mozilla/readability";
27
+ import TurndownService from "turndown";
28
+
29
+ // ── Pure core (browser-free; unit-testable) ──────────────────────────────────
30
+
31
+ /** Extract the reader view of an HTML document. Returns null when Readability
32
+ * cannot find article content (e.g. a nav-only or empty page). */
33
+ export function extractReader(html, { url = "" } = {}) {
34
+ const { document } = parseHTML(html);
35
+ const article = new Readability(document).parse();
36
+ if (!article || !article.content) return null;
37
+ return {
38
+ url,
39
+ title: article.title || "",
40
+ byline: article.byline || "",
41
+ excerpt: article.excerpt || "",
42
+ siteName: article.siteName || "",
43
+ length: article.length || 0,
44
+ contentHtml: article.content,
45
+ text: article.textContent || "",
46
+ };
47
+ }
48
+
49
+ const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced", bulletListMarker: "-" });
50
+
51
+ /** Render a reader extraction to Markdown with a small YAML front-matter (title,
52
+ * byline, excerpt, source) so the snapshot is self-describing for analysis. */
53
+ export function toMarkdown(reader) {
54
+ const q = (s) => JSON.stringify(String(s));
55
+ const fm = [
56
+ "---",
57
+ `title: ${q(reader.title)}`,
58
+ reader.byline ? `byline: ${q(reader.byline)}` : null,
59
+ reader.excerpt ? `excerpt: ${q(reader.excerpt)}` : null,
60
+ reader.url ? `source: ${reader.url}` : null,
61
+ "---",
62
+ ].filter((x) => x != null).join("\n");
63
+ return `${fm}\n\n${turndown.turndown(reader.contentHtml).trim()}\n`;
64
+ }
65
+
66
+ // ── Impure runner ────────────────────────────────────────────────────────────
67
+
68
+ async function walkHtml(dir, base = dir) {
69
+ const out = [];
70
+ for (const e of await readdir(dir, { withFileTypes: true })) {
71
+ const p = join(dir, e.name);
72
+ if (e.isDirectory()) out.push(...await walkHtml(p, base));
73
+ else if (e.name.endsWith(".html")) out.push(p);
74
+ }
75
+ return out;
76
+ }
77
+
78
+ // ── CLI ──────────────────────────────────────────────────────────────────────
79
+
80
+ async function main() {
81
+ const dist = resolve(process.argv[2] && !process.argv[2].startsWith("--") ? process.argv[2] : process.env.SNAPSHOT_DIST || "dist");
82
+ const exists = async (p) => { try { await access(p); return true; } catch { return false; } };
83
+ if (!(await exists(dist))) { console.error(`✗ gen-snapshots: ${dist} not found — build first.`); process.exit(2); }
84
+
85
+ const suffix = process.env.SNAPSHOT_SUFFIX || ".reader";
86
+ const baseUrl = (process.env.SNAPSHOT_BASE_URL || "").replace(/\/$/, "");
87
+ let pages = (process.env.SNAPSHOT_PAGES || "").split(",").map((s) => s.trim().replace(/^\//, "")).filter(Boolean);
88
+ pages = pages.length ? pages.map((p) => resolve(dist, p)) : (await walkHtml(dist)).sort();
89
+
90
+ let wrote = 0, skipped = 0;
91
+ for (const file of pages) {
92
+ const rel = relative(dist, file);
93
+ const url = baseUrl ? `${baseUrl}/${rel.replace(/index\.html$/, "").replace(/\.html$/, "")}` : "";
94
+ const reader = extractReader(await readFile(file, "utf8"), { url });
95
+ if (!reader) { console.error(` · skipped ${rel} (no article content)`); skipped++; continue; }
96
+ const stem = join(dirname(file), basename(file, extname(file)) + suffix);
97
+ await writeFile(`${stem}.html`, reader.contentHtml.trim() + "\n");
98
+ await writeFile(`${stem}.md`, toMarkdown(reader));
99
+ console.log(` ✓ ${rel} → ${relative(dist, stem)}.{html,md} (${reader.length} chars)`);
100
+ wrote++;
101
+ }
102
+ console.log(`✓ gen-snapshots: ${wrote} reader snapshot(s) written${skipped ? `, ${skipped} skipped` : ""}.`);
103
+ }
104
+
105
+ // Only run the CLI when invoked directly (not when imported by a test).
106
+ if (import.meta.url === `file://${process.argv[1]}`) {
107
+ main().catch((e) => { console.error("✗ gen-snapshots: error —", e.stack || e.message); process.exit(1); });
108
+ }
@@ -0,0 +1,61 @@
1
+ // generators/openapi.mjs — the GENERIC, reusable core of a static-API generator.
2
+ //
3
+ // A site's static JSON API is intrinsically site-specific (its endpoints project
4
+ // THAT site's contracts: profile, posts, corpus, a résumé VC, …). So the per-endpoint
5
+ // projection stays in the consuming site's build. What IS reusable — and lives here —
6
+ // is the machinery around it:
7
+ //
8
+ // sortKeys(value) deterministic byte output: recursively sort object
9
+ // keys (arrays keep order).
10
+ // writeApiFile(apiDir, rel, obj, {sort})
11
+ // write a JSON file under the API tree (mkdir -p),
12
+ // key-sorted by default, with a trailing newline.
13
+ // embedSchema(schema) strip $schema/$id so a JSON-Schema component can be
14
+ // embedded under an OpenAPI components/schemas entry
15
+ // whose internal "#/…" refs resolve at the document root.
16
+ // jsonResponse(ref) a 200 application/json response referencing a schema.
17
+ // validateOpenapi(openapi) OpenAPI 3.1/3.2 well-formedness: version, info, ≥1
18
+ // path, every operation carries responses, every local
19
+ // "#/components/…" $ref resolves. Returns string[]
20
+ // (empty = well-formed).
21
+ //
22
+ // Pair with lib/schema-validate.mjs to self-check that each emitted document
23
+ // validates against the schema its OpenAPI operation advertises. Zero deps.
24
+ import { writeFile, mkdir } from "node:fs/promises";
25
+ import { dirname, join } from "node:path";
26
+
27
+ export const sortKeys = (v) =>
28
+ Array.isArray(v) ? v.map(sortKeys)
29
+ : (v && typeof v === "object") ? Object.fromEntries(Object.keys(v).sort().map((k) => [k, sortKeys(v[k])])) : v;
30
+
31
+ export const writeApiFile = async (apiDir, rel, obj, { sort = true } = {}) => {
32
+ const p = join(apiDir, rel);
33
+ await mkdir(dirname(p), { recursive: true });
34
+ await writeFile(p, JSON.stringify(sort ? sortKeys(obj) : obj, null, 2) + "\n");
35
+ return p;
36
+ };
37
+
38
+ // Strip the dialect ($schema) + identity ($id) keys so an embedded component's own
39
+ // "#/…" pointers resolve against the OpenAPI document root. Resources with internal
40
+ // JSON-pointer refs (e.g. draft-04 "#/definitions/…") should re-add an $id by the
41
+ // caller so those refs resolve WITHIN the embedded resource.
42
+ export const embedSchema = (s) => { const { $schema, $id, ...rest } = s; return rest; };
43
+
44
+ export const jsonResponse = (ref) => ({ description: "OK", content: { "application/json": { schema: { $ref: ref } } } });
45
+
46
+ export function validateOpenapi(openapi) {
47
+ const errs = [];
48
+ if (!/^3\.[12]\.\d+$/.test(openapi.openapi || "")) errs.push(`openapi version ${openapi.openapi} is not 3.1/3.2`);
49
+ if (!openapi.info?.title || !openapi.info?.version) errs.push("info.title/version missing");
50
+ if (!openapi.paths || !Object.keys(openapi.paths).length) errs.push("no paths");
51
+ const refs = new Set();
52
+ JSON.stringify(openapi, (k, v) => { if (k === "$ref" && typeof v === "string" && v.startsWith("#/")) refs.add(v); return v; });
53
+ for (const [p, ops] of Object.entries(openapi.paths || {})) for (const [m, op] of Object.entries(ops)) if (!op.responses) errs.push(`${m.toUpperCase()} ${p} has no responses`);
54
+ // Only OpenAPI-level component refs resolve against the document root; schema-internal
55
+ // JSON-pointer refs (e.g. "#/definitions/…") resolve within their own $id'd resource.
56
+ for (const ref of [...refs].filter((r) => r.startsWith("#/components/"))) {
57
+ const node = ref.replace(/^#\//, "").split("/").reduce((o, seg) => o?.[seg], openapi);
58
+ if (node == null) errs.push(`dangling $ref ${ref}`);
59
+ }
60
+ return errs;
61
+ }
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env node
2
+ // integrity · gen-provenance — emit $DIST/provenance.json (+ the /rekor sidecar)
3
+ // for the ENTIRE site. Run at deploy time, after the keyless signing steps, with
4
+ // the GitHub Actions OIDC env in scope.
5
+ //
6
+ // node integrity/gen-provenance.mjs
7
+ //
8
+ // Keyless attestations (GitHub Actions OIDC → Fulcio → Rekor, no stored key):
9
+ // 1. site manifest — cosign sign-blob over $DIST/site.sha256 (the whole
10
+ // served site). Verify the live bytes in place.
11
+ // 2. in-toto statement — cosign sign-blob over $DIST/attestation.intoto.json
12
+ // (the SLSA predicate), IF present (some sites emit one, some don't).
13
+ // 3. OCI artifact — the built site pushed to GHCR + cosign-signed by digest.
14
+ // Proves WHO built the site and that it is intact — not that the build was safe or
15
+ // authorized. The signatures + Rekor entries are ground truth; this file is a
16
+ // convenience view, and the `verify` recipes confirm it independently.
17
+ //
18
+ // Site-agnostic: every site value comes from the GitHub Actions env (GITHUB_*),
19
+ // the OCI_* env, $PROVENANCE_DOC_URL (the caveat link), and $DIST. The emitted
20
+ // builder.repository becomes the identity verify-site.mjs / verify.mjs enforce —
21
+ // nothing is hardcoded.
22
+ import { readFile, writeFile, mkdir, access } from "node:fs/promises";
23
+ import { createHash } from "node:crypto";
24
+ import { join, resolve } from "node:path";
25
+
26
+ // $DIST may be absolute or relative-to-cwd (resolve handles both); default ./dist.
27
+ const dist = resolve(process.cwd(), process.env.DIST || "dist");
28
+ const exists = async (p) => { try { await access(p); return true; } catch { return false; } };
29
+
30
+ const repo = process.env.GITHUB_REPOSITORY || "";
31
+ const sha = process.env.GITHUB_SHA || "";
32
+ const ref = process.env.GITHUB_REF || "";
33
+ const runId = process.env.GITHUB_RUN_ID || "";
34
+ const workflowRef = process.env.GITHUB_WORKFLOW_REF || "";
35
+ const ociRef = process.env.OCI_REF || "";
36
+ const ociDigest = process.env.OCI_DIGEST || "";
37
+ const docUrl = process.env.PROVENANCE_DOC_URL || "";
38
+
39
+ const idFlags =
40
+ ` --certificate-identity-regexp '^https://github.com/${repo}/' \\\n` +
41
+ ` --certificate-oidc-issuer https://token.actions.githubusercontent.com`;
42
+
43
+ async function rekorIndex(bundleName) {
44
+ const p = join(dist, bundleName);
45
+ if (!(await exists(p))) return null;
46
+ try {
47
+ const b = JSON.parse(await readFile(p, "utf8"));
48
+ const e = b?.verificationMaterial?.tlogEntries?.[0];
49
+ return e?.logIndex != null ? String(e.logIndex) : null;
50
+ } catch { return null; }
51
+ }
52
+
53
+ const manifestBytes = await readFile(join(dist, "site.sha256"));
54
+ const manifestSha256 = createHash("sha256").update(manifestBytes).digest("hex");
55
+ const fileCount = manifestBytes.toString("utf8").trim().split("\n").filter(Boolean).length;
56
+ const manifestIdx = await rekorIndex("site.sha256.sigstore.json");
57
+ const attIdx = await rekorIndex("attestation.intoto.json.sigstore.json");
58
+
59
+ const provenance = {
60
+ scope: "entire-site",
61
+ fileCount,
62
+ // Machine-readable freshness. The authoritative timestamp is the Rekor entry's
63
+ // integratedTime (one click away at /rekor) — this is when the build that
64
+ // produced these bytes ran, surfaced so a verifier can report build age.
65
+ builtAt: new Date().toISOString(),
66
+ builder: {
67
+ repository: repo,
68
+ commit: sha,
69
+ ref,
70
+ runId,
71
+ workflowRef,
72
+ issuer: "https://token.actions.githubusercontent.com",
73
+ },
74
+ siteManifest: {
75
+ file: "site.sha256",
76
+ sha256: manifestSha256,
77
+ bundle: "site.sha256.sigstore.json",
78
+ transparencyLog: "rekor.sigstore.dev",
79
+ rekorLogIndex: manifestIdx,
80
+ rekorEntry: manifestIdx ? `https://search.sigstore.dev/?logIndex=${manifestIdx}` : null,
81
+ verify:
82
+ `cosign verify-blob \\\n --bundle site.sha256.sigstore.json \\\n${idFlags} \\\n site.sha256\n` +
83
+ `# then check the live bytes against the signed manifest:\nsha256sum -c site.sha256`,
84
+ },
85
+ intotoStatement: (await exists(join(dist, "attestation.intoto.json")))
86
+ ? {
87
+ file: "attestation.intoto.json",
88
+ bundle: "attestation.intoto.json.sigstore.json",
89
+ predicateType: "https://slsa.dev/provenance/v1",
90
+ rekorLogIndex: attIdx,
91
+ rekorEntry: attIdx ? `https://search.sigstore.dev/?logIndex=${attIdx}` : null,
92
+ verify: `cosign verify-blob \\\n --bundle attestation.intoto.json.sigstore.json \\\n${idFlags} \\\n attestation.intoto.json`,
93
+ }
94
+ : null,
95
+ ociArtifact: ociRef
96
+ ? {
97
+ registry: "ghcr.io",
98
+ ref: ociRef,
99
+ digest: ociDigest || null,
100
+ pull: `oras pull ${ociRef}`,
101
+ verify: `cosign verify ${ociDigest ? ociRef.split(":")[0] + "@" + ociDigest : ociRef} \\\n${idFlags}`,
102
+ }
103
+ : null,
104
+ caveat:
105
+ "Provenance proves who built this site and that it is intact — not that the build was safe or authorized. Identity and integrity, not legitimacy." +
106
+ (docUrl ? ` ${docUrl}` : ""),
107
+ };
108
+
109
+ await writeFile(join(dist, "provenance.json"), JSON.stringify(provenance, null, 2) + "\n");
110
+
111
+ // /rekor — a stable, one-click redirect to THIS build's real Rekor entry (the
112
+ // whole-site manifest signature). The signed HTML can't bake the per-version
113
+ // logIndex without circularity, so this unsigned sidecar carries it (excluded
114
+ // from site.sha256). The target is the real search.sigstore.dev entry showing the
115
+ // cert identity + digest — a wrong index wouldn't match, so it degrades detectably.
116
+ if (manifestIdx) {
117
+ const rekorUrl = `https://search.sigstore.dev/?logIndex=${manifestIdx}`;
118
+ const html = `<!doctype html>
119
+ <html lang="en">
120
+ <head>
121
+ <meta charset="utf-8">
122
+ <meta name="robots" content="noindex">
123
+ <meta http-equiv="refresh" content="0;url=${rekorUrl}">
124
+ <title>This build's Rekor entry</title>
125
+ <script>location.replace(${JSON.stringify(rekorUrl)})</script>
126
+ </head>
127
+ <body style="font-family:system-ui,sans-serif;margin:2rem;line-height:1.5;">
128
+ <p>Redirecting to this build's entry in the public Rekor transparency log…</p>
129
+ <p><a href="${rekorUrl}">${rekorUrl}</a></p>
130
+ </body>
131
+ </html>
132
+ `;
133
+ await mkdir(join(dist, "rekor"), { recursive: true });
134
+ await writeFile(join(dist, "rekor", "index.html"), html);
135
+ }
136
+
137
+ console.log(`✓ provenance: entire site (${fileCount} files) · manifest sha256:${manifestSha256.slice(0, 12)}… · rekor#${manifestIdx ?? "?"}${manifestIdx ? " · /rekor → entry" : ""}${ociRef ? ` · oci ${ociRef}` : ""} → provenance.json`);
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env node
2
+ // integrity · gen-sitemanifest — content-address the ENTIRE built site.
3
+ //
4
+ // Walk $DIST (default ./dist relative to the working directory) and emit
5
+ // `$DIST/site.sha256`: one `sha256␠␠relpath` line per served file, sorted, in the
6
+ // exact format `sha256sum -c` accepts. This single file is the whole-site digest
7
+ // the deploy keyless-signs (cosign sign-blob), so provenance covers every asset.
8
+ // A visitor verifies the signature on this manifest, then checks the live bytes.
9
+ //
10
+ // node integrity/gen-sitemanifest.mjs # uses ./dist
11
+ // DIST=out node integrity/gen-sitemanifest.mjs
12
+ // MANIFEST_EXCLUDE=_worker.js,_extra node integrity/gen-sitemanifest.mjs
13
+ //
14
+ // Site-agnostic: dist resolved from cwd (not the file's location), so it runs
15
+ // identically whether invoked in-repo or vendored. The provenance sidecars are
16
+ // excluded — they describe the site, they are not the site, and the manifest can't
17
+ // hash its own signature. EXCLUDE is a superset of both reference sites' sidecars
18
+ // (a name that doesn't exist in a given site is simply never matched); a consumer
19
+ // adds platform control files of its own via $MANIFEST_EXCLUDE (comma-separated).
20
+ import { readdir, readFile, writeFile } from "node:fs/promises";
21
+ import { createHash } from "node:crypto";
22
+ import { join, relative, resolve } from "node:path";
23
+
24
+ // $DIST may be absolute or relative-to-cwd (resolve handles both); default ./dist.
25
+ const dist = resolve(process.cwd(), process.env.DIST || "dist");
26
+
27
+ const EXCLUDE = new Set([
28
+ "site.sha256",
29
+ "site.sha256.sigstore.json",
30
+ "provenance.json",
31
+ "rekor/index.html",
32
+ "attestation.intoto.json",
33
+ "attestation.intoto.json.sigstore.json",
34
+ // Platform control files: consumed by the host (e.g. Cloudflare Pages), never
35
+ // served as content — so they don't belong in a manifest of SERVED bytes (a
36
+ // verifier re-hashing the live site 404s on them). Still covered by the OCI
37
+ // artifact signature, which packs the whole dist.
38
+ "_headers",
39
+ "_redirects",
40
+ "_routes.json",
41
+ ...(process.env.MANIFEST_EXCLUDE || "").split(",").map((s) => s.trim()).filter(Boolean),
42
+ ]);
43
+
44
+ async function walk(dir) {
45
+ const out = [];
46
+ for (const ent of await readdir(dir, { withFileTypes: true })) {
47
+ const abs = join(dir, ent.name);
48
+ const rel = relative(dist, abs);
49
+ if (ent.isDirectory()) { out.push(...await walk(abs)); continue; }
50
+ if (EXCLUDE.has(rel)) continue;
51
+ out.push(rel);
52
+ }
53
+ return out;
54
+ }
55
+
56
+ const files = (await walk(dist)).sort();
57
+ const lines = [];
58
+ for (const rel of files) {
59
+ const sha256 = createHash("sha256").update(await readFile(join(dist, rel))).digest("hex");
60
+ lines.push(`${sha256} ${rel}`);
61
+ }
62
+ const manifest = lines.join("\n") + "\n";
63
+ await writeFile(join(dist, "site.sha256"), manifest);
64
+
65
+ const siteDigest = createHash("sha256").update(manifest).digest("hex");
66
+ console.log(`✓ site manifest: ${files.length} files → ${process.env.DIST || "dist"}/site.sha256 (site digest sha256:${siteDigest.slice(0, 12)}…)`);
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env node
2
+ // integrity · http-probe — a post-deploy RFC 9110 HTTP-correctness probe.
3
+ //
4
+ // node integrity/http-probe.mjs https://your-site.example
5
+ // PROBE_CONFIG=probe.json node integrity/http-probe.mjs https://your-site.example
6
+ //
7
+ // Sibling to verify-site.mjs (which checks the SIGNED BYTES) — this checks the EDGE'S
8
+ // HTTP SEMANTICS: that the deployed origin speaks HTTP correctly per RFC 9110/9111.
9
+ // It runs AFTER the site is live; it is a post-deploy probe, NOT a build gate (it
10
+ // needs a live URL). Fail-closed: any wrong status / type / parity / conditional
11
+ // behaviour exits 1. Dependency-free (node fetch only).
12
+ //
13
+ // What it asserts (RFC 9110, and 9111 for conditional caching):
14
+ // 1. status — each indexable route returns 200; a known-missing path returns 404.
15
+ // 2. type — Content-Type is correct + carries a charset for text.
16
+ // 3. HEAD parity — HEAD mirrors GET's status + Content-Type and returns no body (§9.3.2).
17
+ // 4. conditional — when GET returns an ETag, a follow-up If-None-Match yields 304 with
18
+ // no body (§13.1.2 / RFC 9111). Skipped (with a note) if no ETag is served.
19
+ // 5. 404 page — the unknown path serves the site's 404 document.
20
+ // 6. redirects terminate — routes that 3xx must reach a terminal 2xx within a hop cap
21
+ // (no loops); the canonical apex host must not itself redirect.
22
+ //
23
+ // Site-agnostic: the routes to probe come from a config (NO hardcoded paths). Supply
24
+ // EITHER a JSON file via $PROBE_CONFIG / 2nd positional arg, OR the env vars
25
+ // $PROBE_HTML_ROUTES + $PROBE_MISSING (comma lists). Config shape:
26
+ // { "htmlRoutes": ["/", "/about"],
27
+ // "typed": [ { "path": "/robots.txt", "type": "text/plain", "charset": true },
28
+ // { "path": "/sitemap.xml", "type": "xml" } ],
29
+ // "missing": "/this-should-404" }
30
+ // With no config at all, only the apex (/) is probed (status + type + HEAD parity).
31
+ import { argv, exit, env } from "node:process";
32
+ import { readFile } from "node:fs/promises";
33
+
34
+ const target = argv[2];
35
+ if (!target || !/^https?:\/\//.test(target)) {
36
+ console.error("usage: http-probe <https://site> [config.json]");
37
+ exit(2);
38
+ }
39
+ const base = target.replace(/\/$/, "");
40
+
41
+ async function loadConfig() {
42
+ const path = argv[3] || env.PROBE_CONFIG;
43
+ if (path) {
44
+ try { return JSON.parse(await readFile(path, "utf8")); }
45
+ catch (e) { console.error(`✗ http-probe: cannot read config ${path}: ${e.message}`); exit(2); }
46
+ }
47
+ const list = (v) => (v || "").split(",").map((s) => s.trim()).filter(Boolean);
48
+ return {
49
+ htmlRoutes: list(env.PROBE_HTML_ROUTES).length ? list(env.PROBE_HTML_ROUTES) : ["/"],
50
+ typed: [],
51
+ missing: env.PROBE_MISSING || "/this-path-should-never-exist-12345",
52
+ };
53
+ }
54
+ const cfg = await loadConfig();
55
+ const HTML_ROUTES = cfg.htmlRoutes || ["/"];
56
+ const TYPED = cfg.typed || [];
57
+ const MISSING = cfg.missing || "/this-path-should-never-exist-12345";
58
+
59
+ let failures = 0;
60
+ const ok = (cond, msg) => { console.log(`${cond ? "✓" : "✗"} ${msg}`); if (!cond) failures++; };
61
+ const note = (msg) => console.log(` · ${msg}`);
62
+ const ct = (res) => (res.headers.get("content-type") || "").toLowerCase();
63
+
64
+ async function main() {
65
+ console.log(`· http-probe: ${base} (RFC 9110 correctness)`);
66
+
67
+ // 1 + 2 + 3 + 4: HTML routes — status, type, HEAD parity, conditional request.
68
+ for (const path of HTML_ROUTES) {
69
+ const url = `${base}${path}`;
70
+ const get = await fetch(url, { redirect: "follow" });
71
+ ok(get.status === 200, `GET ${path} → ${get.status} (want 200)`);
72
+ ok(/text\/html/.test(ct(get)), `GET ${path} Content-Type ${ct(get) || "(none)"} (want text/html)`);
73
+ // charset on HTML is RECOMMENDED, not required — HTML declares it in-band via
74
+ // <meta charset>, and some asset edges omit it on text/html. Note, don't fail.
75
+ if (!/charset=/.test(ct(get))) note(`GET ${path}: no charset in Content-Type (HTML declares it via <meta charset>)`);
76
+
77
+ // HEAD parity (§9.3.2): same status + Content-Type, empty body.
78
+ const head = await fetch(url, { method: "HEAD", redirect: "follow" });
79
+ ok(head.status === get.status, `HEAD ${path} status ${head.status} == GET ${get.status}`);
80
+ ok(ct(head) === ct(get), `HEAD ${path} Content-Type matches GET`);
81
+ const headBody = await head.text();
82
+ ok(headBody.length === 0, `HEAD ${path} returns no body (${headBody.length} bytes)`);
83
+
84
+ // Conditional request (§13.1.2 / RFC 9111): ETag → If-None-Match → 304.
85
+ const etag = get.headers.get("etag");
86
+ if (etag) {
87
+ const inm = await fetch(url, { headers: { "If-None-Match": etag }, redirect: "follow" });
88
+ ok(inm.status === 304, `GET ${path} If-None-Match(${etag.slice(0, 12)}…) → ${inm.status} (want 304)`);
89
+ const body304 = await inm.text();
90
+ ok(body304.length === 0, `304 ${path} carries no body`);
91
+ } else {
92
+ note(`GET ${path}: no ETag served — conditional-request check skipped`);
93
+ }
94
+ }
95
+
96
+ // 2: typed non-HTML assets.
97
+ for (const { path, type, charset, skip } of TYPED) {
98
+ if (skip) continue;
99
+ const res = await fetch(`${base}${path}`, { redirect: "follow" });
100
+ ok(res.status === 200, `GET ${path} → ${res.status} (want 200)`);
101
+ if (type) ok(ct(res).includes(type), `GET ${path} Content-Type ${ct(res) || "(none)"} (want *${type}*)`);
102
+ if (charset) ok(/charset=/.test(ct(res)), `GET ${path} declares a charset`);
103
+ }
104
+
105
+ // 5: 404 handling — unknown path → 404, serving the site's 404 document.
106
+ const miss = await fetch(`${base}${MISSING}`, { redirect: "follow" });
107
+ ok(miss.status === 404, `GET ${MISSING} → ${miss.status} (want 404)`);
108
+ ok(/text\/html/.test(ct(miss)), `404 Content-Type ${ct(miss) || "(none)"} (want text/html)`);
109
+
110
+ // 6: redirects terminate (no loops) within a small hop cap; the apex must not redirect.
111
+ const HOP_CAP = 5;
112
+ const apex = await fetch(base + "/", { redirect: "manual" });
113
+ ok(apex.status < 300 || apex.status >= 400, `apex / does not redirect (status ${apex.status})`);
114
+ let hops = 0, cur = base + "/", terminal = null;
115
+ while (hops <= HOP_CAP) {
116
+ const r = await fetch(cur, { redirect: "manual" });
117
+ if (r.status >= 300 && r.status < 400 && r.headers.get("location")) {
118
+ cur = new URL(r.headers.get("location"), cur).toString();
119
+ hops++;
120
+ continue;
121
+ }
122
+ terminal = r.status;
123
+ break;
124
+ }
125
+ ok(terminal !== null && hops <= HOP_CAP, `redirect chain from / terminates in ${hops} hop(s) → ${terminal ?? "(loop)"}`);
126
+
127
+ console.log(failures ? `\n✗ http-probe FAILED (${failures})` : `\n✓ http-probe: edge HTTP semantics conform to RFC 9110`);
128
+ exit(failures ? 1 : 0);
129
+ }
130
+
131
+ main().catch((e) => { console.error("✗ http-probe: error —", e.message); exit(1); });