@bounded-systems/conformance-kit 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/gates/jargon-gate.mjs +159 -0
- package/integrity/verify/package-lock.json +1207 -0
- package/integrity/verify/package.json +10 -0
- package/integrity/verify/verify.mjs +8 -3
- package/package.json +3 -1
package/README.md
CHANGED
|
@@ -12,7 +12,7 @@ hardcodes `robertdelanghe.dev`, `bounded.tools`, an account, or an email.
|
|
|
12
12
|
|
|
13
13
|
```
|
|
14
14
|
integrity/ verify-site · verify (sigstore) · gen-sitemanifest · gen-provenance · structure-audit · http-probe
|
|
15
|
-
gates/ sbom (gen + completeness) · shacl-runner · seo-gate · axe-gate (axe-core a11y) · vuln-gate (npm audit) · html-validator-gate (vnu) · baseline-gate (web-features) · readability-gate · commonmark-runner · semantic (lone)
|
|
15
|
+
gates/ sbom (gen + completeness) · shacl-runner · seo-gate · axe-gate (axe-core a11y) · vuln-gate (npm audit) · html-validator-gate (vnu) · baseline-gate (web-features) · jargon-gate (plain-language) · readability-gate · commonmark-runner · semantic (lone)
|
|
16
16
|
gates/conformance/ conformance-report — lone's conformance() projection (Node port of jsr:@bounded-systems/lone@0.4) + a generic HTML renderer
|
|
17
17
|
generators/ gen-cid (IPFS UnixFS) · gen-identity (did:web + VC) · gen-snapshots (reader/markdown) · openapi (static-API helper core)
|
|
18
18
|
emitters/ reprDigest (RFC 9530) · securityTxt (RFC 9116) · webManifest · markdown-sibling headers
|
|
@@ -77,6 +77,7 @@ in-process verifier). The Deno semantic runner pins its imports in
|
|
|
77
77
|
| `vuln-gate.mjs` | `node …/vuln-gate.mjs [projectDir]` | `$VULN_ROOT` (lockfile lives here, default `.`). Optional `$VULN_OMIT_DEV` (`true`→production deps only, default `true`), `$VULN_THRESHOLD` (highest tolerated known critical/high, default `0`), `$VULN_REPORT` (write the JSON report). Runs **`npm audit`** and **fails closed** when the known critical/high count exceeds the threshold. The report's `vulns: { knownCriticalOrHighVulns }` envelope is what `conformance-report`'s `security.no-critical-vulns` criterion consumes. |
|
|
78
78
|
| `html-validator-gate.mjs` | `node …/html-validator-gate.mjs [distDir]` | `$HTML_DIST`. Optional `$HTML_PAGES` (comma list, default: every `*.html`), `$HTML_THRESHOLD` (default `0`), `$HTML_REPORT`. Runs **vnu** (the Nu Html Checker, a self-contained Java jar — needs a JRE) `--errors-only` over the built pages and **fails closed** above the threshold. The report's `htmlValidator: { errors }` envelope is what `conformance-report`'s `html.validator-clean` criterion consumes. |
|
|
79
79
|
| `baseline-gate.mjs` | `node …/baseline-gate.mjs [cssGlob]` | `$BASELINE_CSS` (default `dist/**/*.css`). Optional `$BASELINE_TARGET` (`widely`/`newly`, default `widely`), `$BASELINE_REPORT`. Maps the shipped CSS to **web-features Baseline** data (via `stylelint-plugin-use-baseline` — headless, no browser) and **fails closed** when the site-wide status is below target. A feature behind an `@supports` query is a tested fallback and doesn't count against it. The report's `baseline: { status, fallbackTested }` envelope is what `conformance-report`'s `compatibility.baseline` criterion consumes. |
|
|
80
|
+
| `jargon-gate.mjs` | `node …/jargon-gate.mjs [distDir] [--strict]` | `$JARGON_DIST`. Optional `$JARGON_ALLOWLIST` (comma list of accepted terms), `$JARGON_MIN_LENGTH` (default `3`), `$JARGON_THRESHOLD` (default `0`, for `--strict`), `$JARGON_REPORT`. Flags **undefined jargon** in the prose: words not in a 275k-word English dictionary (compounds/possessives atomized first) that the page does not **define** via `<abbr title>`, `<dfn>`, or a `<dl>` glossary — for W3C COGA / WCAG 3.1.3 Unusual Words and for AI readers. WARN-only by default; `--strict` fails closed. Report carries a `plainLanguage: { undefinedJargon, glossaryPresent }` envelope (for a future `cognitive.plain-language` criterion). |
|
|
80
81
|
| `readability-gate.mjs` | `node …/readability-gate.mjs <corpus.json> [--strict]` | **The corpus is an input** the site assembles from its copy: a JSON array of `{id,text}` or an `{id:text}` map. Optional `$READABILITY_THRESHOLDS`, `$READABILITY_MIN_WORDS`, `$READABILITY_KNOWN_ACRONYMS`. WARN-only unless `--strict`. |
|
|
81
82
|
| `commonmark-runner.mjs` | `node …/commonmark-runner.mjs <renderer.mjs> [fixtures.json]` | **The site's markdown renderer module** (export `renderMarkdown`, or set `$COMMONMARK_RENDER_EXPORT`). Default fixtures pin a safe CommonMark subset + 4 hostile-HTML escapes; a site with a different renderer supplies its own `fixtures.json`. |
|
|
82
83
|
| `semantic/gate.ts` | `deno run --allow-read --allow-net …/gate.ts` | Built HTML in `$SEMANTIC_DIR` (default `dist/blog`); `$SEMANTIC_SELECTOR` (subject node, default `article`). Imports `jsr:@bounded-systems/lone`; any error-severity finding fails CI. |
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Plain-language gate — flags UNDEFINED JARGON in a site's prose. For people with
|
|
3
|
+
// cognitive/learning disabilities (W3C COGA, WCAG 3.1.3 Unusual Words / 3.1.4
|
|
4
|
+
// Abbreviations) AND for machine/AI readers, an unusual term should be DEFINED on
|
|
5
|
+
// first use. This gate extracts the prose (excluding code), finds words that are
|
|
6
|
+
// not in a large English dictionary and not on an allowlist, and reports those that
|
|
7
|
+
// the page does not DEFINE (via <abbr title>, <dfn>, or a <dl> glossary).
|
|
8
|
+
//
|
|
9
|
+
// WARN-only by default (it reports signal); `--strict` fails closed above a threshold.
|
|
10
|
+
//
|
|
11
|
+
// node gates/jargon-gate.mjs [distDir] [--strict]
|
|
12
|
+
//
|
|
13
|
+
// Config-driven; NOTHING about any one site is hard-coded:
|
|
14
|
+
// argv / $JARGON_DIST built output dir (default: "dist")
|
|
15
|
+
// $JARGON_ALLOWLIST comma list of accepted domain terms (lowercased)
|
|
16
|
+
// $JARGON_MIN_LENGTH ignore tokens shorter than this (default: 3)
|
|
17
|
+
// $JARGON_THRESHOLD max undefined-jargon terms (--strict) (default: 0)
|
|
18
|
+
// $JARGON_REPORT path to write the JSON report (default: none)
|
|
19
|
+
//
|
|
20
|
+
// The pure tokenize/detect/evaluate functions are exported for unit testing.
|
|
21
|
+
import { readFile, readdir, access, writeFile } from "node:fs/promises";
|
|
22
|
+
import { resolve, join } from "node:path";
|
|
23
|
+
import { createRequire } from "node:module";
|
|
24
|
+
import { parseHTML } from "linkedom";
|
|
25
|
+
|
|
26
|
+
// The dictionary ships as JSON; createRequire loads it without an import attribute.
|
|
27
|
+
const words = createRequire(import.meta.url)("an-array-of-english-words");
|
|
28
|
+
const DICTIONARY = new Set(words);
|
|
29
|
+
export const DEFAULT_MIN_LENGTH = 3;
|
|
30
|
+
|
|
31
|
+
// Stems left behind when "n't" contractions are atomized (couldn't → couldn, t).
|
|
32
|
+
// They are not jargon; skip them so the signal isn't polluted by punctuation.
|
|
33
|
+
const CONTRACTION_STEMS = new Set([
|
|
34
|
+
"couldn", "doesn", "didn", "isn", "wasn", "aren", "weren", "haven", "hasn",
|
|
35
|
+
"hadn", "wouldn", "shouldn", "mustn", "mightn", "needn", "shan", "daren",
|
|
36
|
+
]);
|
|
37
|
+
|
|
38
|
+
// ── Pure core (unit-testable) ────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
/** Split prose into lowercased ATOMIC word tokens (pure letter runs). Atomizing on
|
|
41
|
+
* every non-letter means possessives and hyphenated compounds break into their
|
|
42
|
+
* parts — "build's" → build, s; "agent-authored" → agent, authored — so only a
|
|
43
|
+
* genuinely non-dictionary atom (e.g. "asvs", "frobnicator") can be flagged. */
|
|
44
|
+
export function tokenize(text) {
|
|
45
|
+
return String(text).toLowerCase().match(/[a-z]+/g) || [];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Candidate jargon: tokens not in the dictionary nor the allowlist, ≥ minLength. */
|
|
49
|
+
export function candidateJargon(text, { allowlist = new Set(), minLength = DEFAULT_MIN_LENGTH } = {}) {
|
|
50
|
+
const out = new Set();
|
|
51
|
+
for (const t of tokenize(text)) {
|
|
52
|
+
if (t.length < minLength) continue;
|
|
53
|
+
if (DICTIONARY.has(t) || allowlist.has(t) || CONTRACTION_STEMS.has(t)) continue;
|
|
54
|
+
out.add(t);
|
|
55
|
+
}
|
|
56
|
+
return out;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Evaluate candidate jargon against the terms the page DEFINES. */
|
|
60
|
+
export function evaluateJargon({ candidates, definitions = new Set(), threshold = 0 }) {
|
|
61
|
+
const undefinedJargon = [...candidates].filter((t) => !definitions.has(t)).sort();
|
|
62
|
+
return {
|
|
63
|
+
passed: undefinedJargon.length <= threshold,
|
|
64
|
+
threshold,
|
|
65
|
+
count: undefinedJargon.length,
|
|
66
|
+
undefinedJargon,
|
|
67
|
+
// Envelope a future lone `cognitive.plain-language` criterion can consume.
|
|
68
|
+
plainLanguage: { undefinedJargon: undefinedJargon.length, glossaryPresent: definitions.size > 0 },
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ── Impure: pull prose + defined terms out of a built page ───────────────────
|
|
73
|
+
|
|
74
|
+
/** Extract the visible PROSE (excluding code/script/style/nav) and the set of
|
|
75
|
+
* terms the page DEFINES (<abbr title>, <dfn>, <dl><dt>), all lowercased. */
|
|
76
|
+
export function extractProseAndDefinitions(html) {
|
|
77
|
+
const { document } = parseHTML(html);
|
|
78
|
+
const definitions = new Set();
|
|
79
|
+
const add = (s) => { for (const w of tokenize(s)) definitions.add(w); };
|
|
80
|
+
for (const el of document.querySelectorAll("abbr")) { add(el.textContent || ""); add(el.getAttribute("title") || ""); }
|
|
81
|
+
for (const el of document.querySelectorAll("dfn")) add(el.textContent || "");
|
|
82
|
+
for (const el of document.querySelectorAll("dl dt")) add(el.textContent || "");
|
|
83
|
+
// Prose = body text minus code/script/style/nav. Walk text nodes and insert a
|
|
84
|
+
// boundary space at every element edge, so adjacent blocks (e.g. <dt>/<dd>) don't
|
|
85
|
+
// merge into a fake token ("frobnicator"+"the" → "frobnicatorthe").
|
|
86
|
+
for (const el of document.querySelectorAll("script,style,code,pre,nav")) el.remove();
|
|
87
|
+
const root = document.body || document.documentElement;
|
|
88
|
+
const parts = [];
|
|
89
|
+
const walk = (n) => {
|
|
90
|
+
for (const c of n.childNodes || []) {
|
|
91
|
+
if (c.nodeType === 3) parts.push(c.textContent || "");
|
|
92
|
+
else if (c.nodeType === 1) { walk(c); parts.push(" "); }
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
walk(root);
|
|
96
|
+
return { text: parts.join(" "), definitions };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ── Runner ───────────────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
async function walkHtml(dir) {
|
|
102
|
+
const out = [];
|
|
103
|
+
for (const e of await readdir(dir, { withFileTypes: true })) {
|
|
104
|
+
const p = join(dir, e.name);
|
|
105
|
+
if (e.isDirectory()) out.push(...await walkHtml(p));
|
|
106
|
+
else if (e.name.endsWith(".html")) out.push(p);
|
|
107
|
+
}
|
|
108
|
+
return out;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Scan dist → aggregate candidates + definitions → evaluate. Exposed for tests. */
|
|
112
|
+
export async function runJargonGate({ dist, pages, allowlist = new Set(), minLength = DEFAULT_MIN_LENGTH, threshold = 0 }) {
|
|
113
|
+
const files = pages && pages.length ? pages.map((p) => resolve(dist, p)) : (await walkHtml(resolve(dist))).sort();
|
|
114
|
+
const candidates = new Set();
|
|
115
|
+
const definitions = new Set();
|
|
116
|
+
for (const file of files) {
|
|
117
|
+
const { text, definitions: defs } = extractProseAndDefinitions(await readFile(file, "utf8"));
|
|
118
|
+
for (const t of candidateJargon(text, { allowlist, minLength })) candidates.add(t);
|
|
119
|
+
for (const d of defs) definitions.add(d);
|
|
120
|
+
}
|
|
121
|
+
const report = evaluateJargon({ candidates, definitions, threshold });
|
|
122
|
+
report.pages = files.length;
|
|
123
|
+
return report;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ── CLI ──────────────────────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
async function main() {
|
|
129
|
+
const argv = process.argv.slice(2);
|
|
130
|
+
const strict = argv.includes("--strict");
|
|
131
|
+
const distArg = argv.find((a) => !a.startsWith("--"));
|
|
132
|
+
const dist = resolve(distArg || process.env.JARGON_DIST || "dist");
|
|
133
|
+
const exists = async (p) => { try { await access(p); return true; } catch { return false; } };
|
|
134
|
+
if (!(await exists(dist))) { console.error(`✗ jargon-gate: ${dist} not found — build first.`); process.exit(2); }
|
|
135
|
+
|
|
136
|
+
const allowlist = new Set((process.env.JARGON_ALLOWLIST || "").split(",").map((s) => s.trim().toLowerCase()).filter(Boolean));
|
|
137
|
+
const minLength = Number.parseInt(process.env.JARGON_MIN_LENGTH ?? String(DEFAULT_MIN_LENGTH), 10);
|
|
138
|
+
const threshold = Number.parseInt(process.env.JARGON_THRESHOLD ?? "0", 10);
|
|
139
|
+
|
|
140
|
+
const report = await runJargonGate({ dist, allowlist, minLength, threshold });
|
|
141
|
+
if (process.env.JARGON_REPORT) await writeFile(resolve(process.env.JARGON_REPORT), JSON.stringify(report, null, 2) + "\n");
|
|
142
|
+
|
|
143
|
+
const sample = report.undefinedJargon.slice(0, 30).join(", ");
|
|
144
|
+
const line = `jargon-gate: ${report.count} undefined jargon term(s) across ${report.pages} page(s)` +
|
|
145
|
+
`${report.plainLanguage.glossaryPresent ? " (glossary present)" : " (no glossary/<abbr> definitions found)"}`;
|
|
146
|
+
|
|
147
|
+
if (strict && !report.passed) {
|
|
148
|
+
console.error(`✗ ${line}`);
|
|
149
|
+
console.error(` ${sample}${report.count > 30 ? ` … (+${report.count - 30} more)` : ""}`);
|
|
150
|
+
console.error(` define unusual terms on first use (<abbr title>, <dfn>, or a glossary), or allowlist accepted domain terms via $JARGON_ALLOWLIST.`);
|
|
151
|
+
process.exit(1);
|
|
152
|
+
}
|
|
153
|
+
console.log(`✓ ${line}`);
|
|
154
|
+
if (report.count) console.log(` ${sample}${report.count > 30 ? ` … (+${report.count - 30} more)` : ""} (WARN-only; pass --strict to block)`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
158
|
+
main().catch((e) => { console.error("✗ jargon-gate: error —", e.stack || e.message); process.exit(1); });
|
|
159
|
+
}
|