@mcptoolshop/a11y-evidence-engine 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * Canonicalize a JSON value per prov-spec requirements.
5
+ * - Sorted keys
6
+ * - No whitespace
7
+ * - UTF-8 (handled by Node.js strings)
8
+ *
9
+ * This is a JCS-subset per RFC 8785.
10
+ *
11
+ * @param {any} value - JSON-compatible value
12
+ * @returns {string} Canonical JSON string
13
+ */
14
+ function canonicalize(value) {
15
+ if (value === null) {
16
+ return "null";
17
+ }
18
+
19
+ const type = typeof value;
20
+
21
+ if (type === "boolean") {
22
+ return value ? "true" : "false";
23
+ }
24
+
25
+ if (type === "string") {
26
+ return JSON.stringify(value);
27
+ }
28
+
29
+ if (type === "number") {
30
+ if (!Number.isFinite(value)) {
31
+ throw new Error("Non-finite numbers not allowed in canonical JSON");
32
+ }
33
+ return JSON.stringify(value);
34
+ }
35
+
36
+ if (Array.isArray(value)) {
37
+ const items = value.map((item) => canonicalize(item));
38
+ return "[" + items.join(",") + "]";
39
+ }
40
+
41
+ if (type === "object") {
42
+ const keys = Object.keys(value).sort();
43
+ const pairs = keys.map(
44
+ (key) => JSON.stringify(key) + ":" + canonicalize(value[key])
45
+ );
46
+ return "{" + pairs.join(",") + "}";
47
+ }
48
+
49
+ throw new Error(`Non-JSON value type: ${type}`);
50
+ }
51
+
52
+ module.exports = { canonicalize };
@@ -0,0 +1,34 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * JSON Pointer utilities per RFC 6901.
5
+ */
6
+
7
+ /**
8
+ * Build a JSON pointer from nodes index.
9
+ * @param {number} index - Node index
10
+ * @returns {string} JSON pointer
11
+ */
12
+ function nodePointer(index) {
13
+ return `/nodes/${index}`;
14
+ }
15
+
16
+ /**
17
+ * Escape a string for use in a JSON pointer segment.
18
+ * @param {string} str - Raw string
19
+ * @returns {string} Escaped string
20
+ */
21
+ function escapePointer(str) {
22
+ return str.replace(/~/g, "~0").replace(/\//g, "~1");
23
+ }
24
+
25
+ /**
26
+ * Unescape a JSON pointer segment.
27
+ * @param {string} str - Escaped string
28
+ * @returns {string} Raw string
29
+ */
30
+ function unescapePointer(str) {
31
+ return str.replace(/~1/g, "/").replace(/~0/g, "~");
32
+ }
33
+
34
+ module.exports = { nodePointer, escapePointer, unescapePointer };
@@ -0,0 +1,153 @@
1
+ "use strict";
2
+
3
+ const crypto = require("crypto");
4
+ const { canonicalize } = require("./canonicalize.js");
5
+
6
+ /**
7
+ * Generate prov-spec provenance records for a finding.
8
+ *
9
+ * Emits three records:
10
+ * 1. record.json - engine.extract.evidence.json_pointer
11
+ * 2. digest.json - integrity.digest.sha256
12
+ * 3. envelope.json - adapter.wrap.envelope_v0_1
13
+ *
14
+ * @param {Object} finding - Finding with evidence
15
+ * @param {Object} options - { engineVersion }
16
+ * @returns {{ record: Object, digest: Object, envelope: Object }}
17
+ */
18
+ function emitProvenance(finding, options = {}) {
19
+ const engineVersion = options.engineVersion || "0.1.0";
20
+ const timestamp = options.timestamp || new Date().toISOString();
21
+
22
+ // 1. Evidence extraction record
23
+ const record = {
24
+ "prov.record.v0.1": {
25
+ method_id: "engine.extract.evidence.json_pointer",
26
+ timestamp,
27
+ inputs: [
28
+ {
29
+ "artifact.v0.1": {
30
+ name: "source_document",
31
+ uri: `file://${finding.location.file}`,
32
+ },
33
+ },
34
+ ],
35
+ outputs: [
36
+ {
37
+ "artifact.v0.1": {
38
+ name: "evidence",
39
+ content: {
40
+ document_ref: finding.location.file,
41
+ pointer: finding.location.json_pointer,
42
+ evidence: finding.evidence,
43
+ },
44
+ },
45
+ },
46
+ ],
47
+ agent: {
48
+ name: "a11y-evidence-engine",
49
+ version: engineVersion,
50
+ },
51
+ },
52
+ };
53
+
54
+ // 2. Integrity digest of canonical evidence
55
+ const evidenceContent = record["prov.record.v0.1"].outputs[0]["artifact.v0.1"].content;
56
+ const canonicalEvidence = canonicalize(evidenceContent);
57
+ const evidenceDigest = crypto
58
+ .createHash("sha256")
59
+ .update(canonicalEvidence, "utf8")
60
+ .digest("hex");
61
+
62
+ const digest = {
63
+ "prov.record.v0.1": {
64
+ method_id: "integrity.digest.sha256",
65
+ timestamp,
66
+ inputs: [
67
+ {
68
+ "artifact.v0.1": {
69
+ name: "evidence",
70
+ content: evidenceContent,
71
+ },
72
+ },
73
+ ],
74
+ outputs: [
75
+ {
76
+ "artifact.v0.1": {
77
+ name: "digest",
78
+ digest: {
79
+ algorithm: "sha256",
80
+ value: evidenceDigest,
81
+ },
82
+ },
83
+ },
84
+ ],
85
+ agent: {
86
+ name: "a11y-evidence-engine",
87
+ version: engineVersion,
88
+ },
89
+ },
90
+ };
91
+
92
+ // 3. Envelope wrapping the evidence record
93
+ const envelope = {
94
+ "mcp.envelope.v0.1": {
95
+ result: {
96
+ finding_id: finding.finding_id,
97
+ rule_id: finding.rule_id,
98
+ severity: finding.severity,
99
+ message: finding.message,
100
+ location: finding.location,
101
+ },
102
+ provenance: {
103
+ "prov.record.v0.1": {
104
+ method_id: "adapter.wrap.envelope_v0_1",
105
+ timestamp,
106
+ inputs: [
107
+ {
108
+ "artifact.v0.1": {
109
+ name: "evidence_record",
110
+ digest: {
111
+ algorithm: "sha256",
112
+ value: evidenceDigest,
113
+ },
114
+ },
115
+ },
116
+ ],
117
+ outputs: [
118
+ {
119
+ "artifact.v0.1": {
120
+ name: "envelope",
121
+ content_type: "mcp.envelope.v0.1",
122
+ },
123
+ },
124
+ ],
125
+ agent: {
126
+ name: "a11y-evidence-engine",
127
+ version: engineVersion,
128
+ },
129
+ },
130
+ },
131
+ },
132
+ };
133
+
134
+ return { record, digest, envelope };
135
+ }
136
+
137
+ /**
138
+ * Verify a digest matches the canonical evidence.
139
+ *
140
+ * @param {Object} evidence - Evidence object
141
+ * @param {string} expectedDigest - Expected SHA-256 hex digest
142
+ * @returns {boolean}
143
+ */
144
+ function verifyDigest(evidence, expectedDigest) {
145
+ const canonical = canonicalize(evidence);
146
+ const actualDigest = crypto
147
+ .createHash("sha256")
148
+ .update(canonical, "utf8")
149
+ .digest("hex");
150
+ return actualDigest === expectedDigest;
151
+ }
152
+
153
+ module.exports = { emitProvenance, verifyDigest, canonicalize };
package/src/fswalk.js ADDED
@@ -0,0 +1,56 @@
1
+ "use strict";
2
+
3
+ const fs = require("fs");
4
+ const path = require("path");
5
+
6
+ /**
7
+ * Gather all .html files from a path.
8
+ * If path is a file, returns [path].
9
+ * If path is a directory, recursively finds all .html files.
10
+ *
11
+ * @param {string} targetPath - File or directory path
12
+ * @returns {string[]} Array of absolute file paths, sorted for determinism
13
+ */
14
+ function walkHtmlFiles(targetPath) {
15
+ const resolved = path.resolve(targetPath);
16
+
17
+ if (!fs.existsSync(resolved)) {
18
+ throw new Error(`Path does not exist: ${resolved}`);
19
+ }
20
+
21
+ const stat = fs.statSync(resolved);
22
+
23
+ if (stat.isFile()) {
24
+ if (resolved.endsWith(".html") || resolved.endsWith(".htm")) {
25
+ return [resolved];
26
+ }
27
+ throw new Error(`Not an HTML file: ${resolved}`);
28
+ }
29
+
30
+ if (stat.isDirectory()) {
31
+ const files = [];
32
+ walkDir(resolved, files);
33
+ // Sort for deterministic ordering
34
+ return files.sort();
35
+ }
36
+
37
+ throw new Error(`Invalid path type: ${resolved}`);
38
+ }
39
+
40
+ function walkDir(dir, files) {
41
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
42
+
43
+ for (const entry of entries) {
44
+ const fullPath = path.join(dir, entry.name);
45
+
46
+ if (entry.isDirectory()) {
47
+ walkDir(fullPath, files);
48
+ } else if (entry.isFile()) {
49
+ if (entry.name.endsWith(".html") || entry.name.endsWith(".htm")) {
50
+ files.push(fullPath);
51
+ }
52
+ }
53
+ }
54
+ }
55
+
56
+ module.exports = { walkHtmlFiles };
@@ -0,0 +1,117 @@
1
+ "use strict";
2
+
3
+ const { Parser } = require("htmlparser2");
4
+
5
+ /**
6
+ * Parse HTML and build a flat nodes[] array in document order.
7
+ * Each node has a stable index for JSON pointer references.
8
+ *
9
+ * @param {string} html - Raw HTML content
10
+ * @returns {{ nodes: Array, root: Object|null }}
11
+ */
12
+ function parseHtml(html) {
13
+ const nodes = [];
14
+ const stack = [];
15
+ let root = null;
16
+
17
+ const parser = new Parser(
18
+ {
19
+ onopentag(name, attrs) {
20
+ const node = {
21
+ type: "element",
22
+ tagName: name.toLowerCase(),
23
+ attrs: { ...attrs },
24
+ children: [],
25
+ parent: stack.length > 0 ? stack[stack.length - 1] : null,
26
+ index: nodes.length,
27
+ };
28
+
29
+ nodes.push(node);
30
+
31
+ if (node.parent) {
32
+ node.parent.children.push(node);
33
+ } else {
34
+ root = node;
35
+ }
36
+
37
+ stack.push(node);
38
+ },
39
+
40
+ ontext(text) {
41
+ // Only capture non-whitespace text for accessibility analysis
42
+ const trimmed = text.trim();
43
+ if (trimmed && stack.length > 0) {
44
+ const textNode = {
45
+ type: "text",
46
+ content: text,
47
+ trimmed: trimmed,
48
+ parent: stack[stack.length - 1],
49
+ index: nodes.length,
50
+ };
51
+ nodes.push(textNode);
52
+ stack[stack.length - 1].children.push(textNode);
53
+ }
54
+ },
55
+
56
+ onclosetag() {
57
+ stack.pop();
58
+ },
59
+ },
60
+ {
61
+ lowerCaseTags: true,
62
+ lowerCaseAttributeNames: true,
63
+ decodeEntities: true,
64
+ }
65
+ );
66
+
67
+ parser.write(html);
68
+ parser.end();
69
+
70
+ return { nodes, root };
71
+ }
72
+
73
+ /**
74
+ * Get the text content of an element (concatenated child text nodes).
75
+ * @param {Object} node - Element node
76
+ * @returns {string}
77
+ */
78
+ function getTextContent(node) {
79
+ if (!node || !node.children) return "";
80
+
81
+ let text = "";
82
+ for (const child of node.children) {
83
+ if (child.type === "text") {
84
+ text += child.content;
85
+ } else if (child.type === "element") {
86
+ text += getTextContent(child);
87
+ }
88
+ }
89
+ return text.trim();
90
+ }
91
+
92
+ /**
93
+ * Find all elements matching a predicate.
94
+ * @param {Array} nodes - Flat nodes array
95
+ * @param {Function} predicate - (node) => boolean
96
+ * @returns {Array}
97
+ */
98
+ function findElements(nodes, predicate) {
99
+ return nodes.filter((n) => n.type === "element" && predicate(n));
100
+ }
101
+
102
+ /**
103
+ * Find element by ID.
104
+ * @param {Array} nodes - Flat nodes array
105
+ * @param {string} id - Element ID
106
+ * @returns {Object|null}
107
+ */
108
+ function getElementById(nodes, id) {
109
+ return nodes.find((n) => n.type === "element" && n.attrs.id === id) || null;
110
+ }
111
+
112
+ module.exports = {
113
+ parseHtml,
114
+ getTextContent,
115
+ findElements,
116
+ getElementById,
117
+ };
package/src/ids.js ADDED
@@ -0,0 +1,53 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * Generate deterministic finding IDs.
5
+ * Findings are sorted by (file, rule_id, pointer) then numbered.
6
+ *
7
+ * @param {Array} findings - Raw findings array
8
+ * @returns {Array} Findings with assigned finding_id
9
+ */
10
+ function assignFindingIds(findings) {
11
+ // Sort for deterministic ordering
12
+ const sorted = [...findings].sort((a, b) => {
13
+ // First by file
14
+ const fileCompare = a.location.file.localeCompare(b.location.file);
15
+ if (fileCompare !== 0) return fileCompare;
16
+
17
+ // Then by rule_id
18
+ const ruleCompare = a.rule_id.localeCompare(b.rule_id);
19
+ if (ruleCompare !== 0) return ruleCompare;
20
+
21
+ // Then by pointer (numeric extraction for proper sorting)
22
+ const aIndex = extractPointerIndex(a.location.json_pointer);
23
+ const bIndex = extractPointerIndex(b.location.json_pointer);
24
+ return aIndex - bIndex;
25
+ });
26
+
27
+ // Assign sequential IDs
28
+ return sorted.map((finding, index) => ({
29
+ ...finding,
30
+ finding_id: formatFindingId(index + 1),
31
+ }));
32
+ }
33
+
34
+ /**
35
+ * Extract numeric index from JSON pointer.
36
+ * @param {string} pointer - e.g., "/nodes/12"
37
+ * @returns {number}
38
+ */
39
+ function extractPointerIndex(pointer) {
40
+ const match = pointer.match(/\/nodes\/(\d+)/);
41
+ return match ? parseInt(match[1], 10) : 0;
42
+ }
43
+
44
+ /**
45
+ * Format finding ID with zero-padding.
46
+ * @param {number} num - Finding number (1-based)
47
+ * @returns {string} e.g., "finding-0001"
48
+ */
49
+ function formatFindingId(num) {
50
+ return `finding-${String(num).padStart(4, "0")}`;
51
+ }
52
+
53
+ module.exports = { assignFindingIds, formatFindingId };
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+
3
+ const { findElements } = require("../html_parse.js");
4
+
5
+ const RULE_ID = "html.document.missing_lang";
6
+
7
+ /**
8
+ * Check for <html> element missing lang attribute.
9
+ * WCAG 3.1.1 - Language of Page (Level A)
10
+ */
11
+ function run(nodes, context) {
12
+ const findings = [];
13
+
14
+ const htmlElements = findElements(nodes, (n) => n.tagName === "html");
15
+
16
+ for (const node of htmlElements) {
17
+ const lang = node.attrs.lang;
18
+
19
+ if (!lang || lang.trim() === "") {
20
+ findings.push({
21
+ rule_id: RULE_ID,
22
+ severity: "error",
23
+ confidence: 1.0,
24
+ message: "Document is missing a lang attribute on the <html> element.",
25
+ location: {
26
+ file: context.relativePath,
27
+ json_pointer: `/nodes/${node.index}`,
28
+ },
29
+ evidence: {
30
+ tagName: node.tagName,
31
+ attrs: filterAttrs(node.attrs, ["lang"]),
32
+ },
33
+ });
34
+ }
35
+ }
36
+
37
+ return findings;
38
+ }
39
+
40
+ function filterAttrs(attrs, keys) {
41
+ const filtered = {};
42
+ for (const key of keys) {
43
+ if (key in attrs) {
44
+ filtered[key] = attrs[key];
45
+ }
46
+ }
47
+ return filtered;
48
+ }
49
+
50
+ module.exports = { id: RULE_ID, run };
@@ -0,0 +1,105 @@
1
+ "use strict";
2
+
3
+ const { findElements, getElementById } = require("../html_parse.js");
4
+
5
+ const RULE_ID = "html.form_control.missing_label";
6
+
7
+ const FORM_CONTROLS = ["input", "select", "textarea"];
8
+ const EXEMPT_INPUT_TYPES = ["hidden", "submit", "reset", "button", "image"];
9
+
10
+ /**
11
+ * Check for form controls missing associated labels.
12
+ * WCAG 1.3.1 - Info and Relationships (Level A)
13
+ * WCAG 4.1.2 - Name, Role, Value (Level A)
14
+ *
15
+ * A form control needs one of:
16
+ * - Associated <label for="...">
17
+ * - aria-label attribute
18
+ * - aria-labelledby attribute (pointing to existing element)
19
+ */
20
+ function run(nodes, context) {
21
+ const findings = [];
22
+
23
+ // Find all label elements for lookup
24
+ const labels = findElements(nodes, (n) => n.tagName === "label");
25
+ const labelForIds = new Set(
26
+ labels.map((l) => l.attrs.for).filter((f) => f)
27
+ );
28
+
29
+ // Check form controls
30
+ const controls = findElements(nodes, (n) => FORM_CONTROLS.includes(n.tagName));
31
+
32
+ for (const node of controls) {
33
+ // Skip exempt input types
34
+ if (node.tagName === "input") {
35
+ const type = (node.attrs.type || "text").toLowerCase();
36
+ if (EXEMPT_INPUT_TYPES.includes(type)) {
37
+ continue;
38
+ }
39
+ }
40
+
41
+ if (!hasAccessibleLabel(node, labelForIds, nodes)) {
42
+ findings.push({
43
+ rule_id: RULE_ID,
44
+ severity: "error",
45
+ confidence: 0.95,
46
+ message: `Form control <${node.tagName}> is missing an associated label.`,
47
+ location: {
48
+ file: context.relativePath,
49
+ json_pointer: `/nodes/${node.index}`,
50
+ },
51
+ evidence: {
52
+ tagName: node.tagName,
53
+ attrs: filterAttrs(node.attrs, [
54
+ "id",
55
+ "name",
56
+ "type",
57
+ "aria-label",
58
+ "aria-labelledby",
59
+ ]),
60
+ },
61
+ });
62
+ }
63
+ }
64
+
65
+ return findings;
66
+ }
67
+
68
+ /**
69
+ * Check if a form control has an accessible label.
70
+ */
71
+ function hasAccessibleLabel(node, labelForIds, nodes) {
72
+ // Check aria-label
73
+ if (node.attrs["aria-label"] && node.attrs["aria-label"].trim()) {
74
+ return true;
75
+ }
76
+
77
+ // Check aria-labelledby (must point to existing element)
78
+ if (node.attrs["aria-labelledby"]) {
79
+ const ids = node.attrs["aria-labelledby"].split(/\s+/);
80
+ for (const id of ids) {
81
+ if (getElementById(nodes, id)) {
82
+ return true;
83
+ }
84
+ }
85
+ }
86
+
87
+ // Check for associated label via 'for' attribute
88
+ if (node.attrs.id && labelForIds.has(node.attrs.id)) {
89
+ return true;
90
+ }
91
+
92
+ return false;
93
+ }
94
+
95
+ function filterAttrs(attrs, keys) {
96
+ const filtered = {};
97
+ for (const key of keys) {
98
+ if (key in attrs) {
99
+ filtered[key] = attrs[key];
100
+ }
101
+ }
102
+ return filtered;
103
+ }
104
+
105
+ module.exports = { id: RULE_ID, run };
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+
3
+ const { findElements } = require("../html_parse.js");
4
+
5
+ const RULE_ID = "html.img.missing_alt";
6
+
7
+ /**
8
+ * Check for <img> elements missing alt attribute.
9
+ * WCAG 1.1.1 - Non-text Content (Level A)
10
+ *
11
+ * Exceptions:
12
+ * - role="presentation" or role="none"
13
+ * - aria-hidden="true"
14
+ */
15
+ function run(nodes, context) {
16
+ const findings = [];
17
+
18
+ const imgElements = findElements(nodes, (n) => n.tagName === "img");
19
+
20
+ for (const node of imgElements) {
21
+ // Skip decorative images
22
+ if (isDecorativeImage(node)) {
23
+ continue;
24
+ }
25
+
26
+ const alt = node.attrs.alt;
27
+
28
+ // Missing alt attribute entirely
29
+ if (alt === undefined) {
30
+ findings.push({
31
+ rule_id: RULE_ID,
32
+ severity: "error",
33
+ confidence: 0.98,
34
+ message: "Image element is missing alt text.",
35
+ location: {
36
+ file: context.relativePath,
37
+ json_pointer: `/nodes/${node.index}`,
38
+ },
39
+ evidence: {
40
+ tagName: node.tagName,
41
+ attrs: filterAttrs(node.attrs, ["src", "alt", "role", "aria-hidden"]),
42
+ },
43
+ });
44
+ }
45
+ }
46
+
47
+ return findings;
48
+ }
49
+
50
+ /**
51
+ * Check if image is marked as decorative.
52
+ */
53
+ function isDecorativeImage(node) {
54
+ const role = node.attrs.role;
55
+ if (role === "presentation" || role === "none") {
56
+ return true;
57
+ }
58
+
59
+ const ariaHidden = node.attrs["aria-hidden"];
60
+ if (ariaHidden === "true") {
61
+ return true;
62
+ }
63
+
64
+ return false;
65
+ }
66
+
67
+ function filterAttrs(attrs, keys) {
68
+ const filtered = {};
69
+ for (const key of keys) {
70
+ if (key in attrs) {
71
+ filtered[key] = attrs[key];
72
+ }
73
+ }
74
+ return filtered;
75
+ }
76
+
77
+ module.exports = { id: RULE_ID, run };