npm - @mcptoolshop/a11y-evidence-engine - Versions diffs - 0.2.0 - Mend

@mcptoolshop/a11y-evidence-engine 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/.github/workflows/ci.yml +53 -0
package/CODE_OF_CONDUCT.md +129 -0
package/CONTRIBUTING.md +128 -0
package/LICENSE +21 -0
package/README.md +71 -0
package/bin/a11y-engine.js +11 -0
package/fixtures/bad/button-no-name.html +30 -0
package/fixtures/bad/img-missing-alt.html +19 -0
package/fixtures/bad/input-missing-label.html +26 -0
package/fixtures/bad/missing-lang.html +11 -0
package/fixtures/good/index.html +29 -0
package/package.json +44 -0
package/src/cli.js +74 -0
package/src/evidence/canonicalize.js +52 -0
package/src/evidence/json_pointer.js +34 -0
package/src/evidence/prov_emit.js +153 -0
package/src/fswalk.js +56 -0
package/src/html_parse.js +117 -0
package/src/ids.js +53 -0
package/src/rules/document_missing_lang.js +50 -0
package/src/rules/form_control_missing_label.js +105 -0
package/src/rules/img_missing_alt.js +77 -0
package/src/rules/index.js +37 -0
package/src/rules/interactive_missing_name.js +129 -0
package/src/scan.js +128 -0
package/test/scan.test.js +149 -0
package/test/vectors.test.js +200 -0

package/src/evidence/canonicalize.js ADDED Viewed

@@ -0,0 +1,52 @@
+"use strict";
+/**
+ * Canonicalize a JSON value per prov-spec requirements.
+ * - Sorted keys
+ * - No whitespace
+ * - UTF-8 (handled by Node.js strings)
+ *
+ * This is a JCS-subset per RFC 8785.
+ *
+ * @param {any} value - JSON-compatible value
+ * @returns {string} Canonical JSON string
+ */
+function canonicalize(value) {
+  if (value === null) {
+    return "null";
+  }
+  const type = typeof value;
+  if (type === "boolean") {
+    return value ? "true" : "false";
+  }
+  if (type === "string") {
+    return JSON.stringify(value);
+  }
+  if (type === "number") {
+    if (!Number.isFinite(value)) {
+      throw new Error("Non-finite numbers not allowed in canonical JSON");
+    }
+    return JSON.stringify(value);
+  }
+  if (Array.isArray(value)) {
+    const items = value.map((item) => canonicalize(item));
+    return "[" + items.join(",") + "]";
+  }
+  if (type === "object") {
+    const keys = Object.keys(value).sort();
+    const pairs = keys.map(
+      (key) => JSON.stringify(key) + ":" + canonicalize(value[key])
+    );
+    return "{" + pairs.join(",") + "}";
+  }
+  throw new Error(`Non-JSON value type: ${type}`);
+}
+module.exports = { canonicalize };

package/src/evidence/json_pointer.js ADDED Viewed

@@ -0,0 +1,34 @@
+"use strict";
+/**
+ * JSON Pointer utilities per RFC 6901.
+ */
+/**
+ * Build a JSON pointer from nodes index.
+ * @param {number} index - Node index
+ * @returns {string} JSON pointer
+ */
+function nodePointer(index) {
+  return `/nodes/${index}`;
+}
+/**
+ * Escape a string for use in a JSON pointer segment.
+ * @param {string} str - Raw string
+ * @returns {string} Escaped string
+ */
+function escapePointer(str) {
+  return str.replace(/~/g, "~0").replace(/\//g, "~1");
+}
+/**
+ * Unescape a JSON pointer segment.
+ * @param {string} str - Escaped string
+ * @returns {string} Raw string
+ */
+function unescapePointer(str) {
+  return str.replace(/~1/g, "/").replace(/~0/g, "~");
+}
+module.exports = { nodePointer, escapePointer, unescapePointer };

package/src/evidence/prov_emit.js ADDED Viewed

@@ -0,0 +1,153 @@
+"use strict";
+const crypto = require("crypto");
+const { canonicalize } = require("./canonicalize.js");
+/**
+ * Generate prov-spec provenance records for a finding.
+ *
+ * Emits three records:
+ * 1. record.json - engine.extract.evidence.json_pointer
+ * 2. digest.json - integrity.digest.sha256
+ * 3. envelope.json - adapter.wrap.envelope_v0_1
+ *
+ * @param {Object} finding - Finding with evidence
+ * @param {Object} options - { engineVersion }
+ * @returns {{ record: Object, digest: Object, envelope: Object }}
+ */
+function emitProvenance(finding, options = {}) {
+  const engineVersion = options.engineVersion || "0.1.0";
+  const timestamp = options.timestamp || new Date().toISOString();
+  // 1. Evidence extraction record
+  const record = {
+    "prov.record.v0.1": {
+      method_id: "engine.extract.evidence.json_pointer",
+      timestamp,
+      inputs: [
+        {
+          "artifact.v0.1": {
+            name: "source_document",
+            uri: `file://${finding.location.file}`,
+          },
+        },
+      ],
+      outputs: [
+        {
+          "artifact.v0.1": {
+            name: "evidence",
+            content: {
+              document_ref: finding.location.file,
+              pointer: finding.location.json_pointer,
+              evidence: finding.evidence,
+            },
+          },
+        },
+      ],
+      agent: {
+        name: "a11y-evidence-engine",
+        version: engineVersion,
+      },
+    },
+  };
+  // 2. Integrity digest of canonical evidence
+  const evidenceContent = record["prov.record.v0.1"].outputs[0]["artifact.v0.1"].content;
+  const canonicalEvidence = canonicalize(evidenceContent);
+  const evidenceDigest = crypto
+    .createHash("sha256")
+    .update(canonicalEvidence, "utf8")
+    .digest("hex");
+  const digest = {
+    "prov.record.v0.1": {
+      method_id: "integrity.digest.sha256",
+      timestamp,
+      inputs: [
+        {
+          "artifact.v0.1": {
+            name: "evidence",
+            content: evidenceContent,
+          },
+        },
+      ],
+      outputs: [
+        {
+          "artifact.v0.1": {
+            name: "digest",
+            digest: {
+              algorithm: "sha256",
+              value: evidenceDigest,
+            },
+          },
+        },
+      ],
+      agent: {
+        name: "a11y-evidence-engine",
+        version: engineVersion,
+      },
+    },
+  };
+  // 3. Envelope wrapping the evidence record
+  const envelope = {
+    "mcp.envelope.v0.1": {
+      result: {
+        finding_id: finding.finding_id,
+        rule_id: finding.rule_id,
+        severity: finding.severity,
+        message: finding.message,
+        location: finding.location,
+      },
+      provenance: {
+        "prov.record.v0.1": {
+          method_id: "adapter.wrap.envelope_v0_1",
+          timestamp,
+          inputs: [
+            {
+              "artifact.v0.1": {
+                name: "evidence_record",
+                digest: {
+                  algorithm: "sha256",
+                  value: evidenceDigest,
+                },
+              },
+            },
+          ],
+          outputs: [
+            {
+              "artifact.v0.1": {
+                name: "envelope",
+                content_type: "mcp.envelope.v0.1",
+              },
+            },
+          ],
+          agent: {
+            name: "a11y-evidence-engine",
+            version: engineVersion,
+          },
+        },
+      },
+    },
+  };
+  return { record, digest, envelope };
+}
+/**
+ * Verify a digest matches the canonical evidence.
+ *
+ * @param {Object} evidence - Evidence object
+ * @param {string} expectedDigest - Expected SHA-256 hex digest
+ * @returns {boolean}
+ */
+function verifyDigest(evidence, expectedDigest) {
+  const canonical = canonicalize(evidence);
+  const actualDigest = crypto
+    .createHash("sha256")
+    .update(canonical, "utf8")
+    .digest("hex");
+  return actualDigest === expectedDigest;
+}
+module.exports = { emitProvenance, verifyDigest, canonicalize };

package/src/fswalk.js ADDED Viewed

@@ -0,0 +1,56 @@
+"use strict";
+const fs = require("fs");
+const path = require("path");
+/**
+ * Gather all .html files from a path.
+ * If path is a file, returns [path].
+ * If path is a directory, recursively finds all .html files.
+ *
+ * @param {string} targetPath - File or directory path
+ * @returns {string[]} Array of absolute file paths, sorted for determinism
+ */
+function walkHtmlFiles(targetPath) {
+  const resolved = path.resolve(targetPath);
+  if (!fs.existsSync(resolved)) {
+    throw new Error(`Path does not exist: ${resolved}`);
+  }
+  const stat = fs.statSync(resolved);
+  if (stat.isFile()) {
+    if (resolved.endsWith(".html") || resolved.endsWith(".htm")) {
+      return [resolved];
+    }
+    throw new Error(`Not an HTML file: ${resolved}`);
+  }
+  if (stat.isDirectory()) {
+    const files = [];
+    walkDir(resolved, files);
+    // Sort for deterministic ordering
+    return files.sort();
+  }
+  throw new Error(`Invalid path type: ${resolved}`);
+}
+function walkDir(dir, files) {
+  const entries = fs.readdirSync(dir, { withFileTypes: true });
+  for (const entry of entries) {
+    const fullPath = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      walkDir(fullPath, files);
+    } else if (entry.isFile()) {
+      if (entry.name.endsWith(".html") || entry.name.endsWith(".htm")) {
+        files.push(fullPath);
+      }
+    }
+  }
+}
+module.exports = { walkHtmlFiles };

package/src/html_parse.js ADDED Viewed

@@ -0,0 +1,117 @@
+"use strict";
+const { Parser } = require("htmlparser2");
+/**
+ * Parse HTML and build a flat nodes[] array in document order.
+ * Each node has a stable index for JSON pointer references.
+ *
+ * @param {string} html - Raw HTML content
+ * @returns {{ nodes: Array, root: Object|null }}
+ */
+function parseHtml(html) {
+  const nodes = [];
+  const stack = [];
+  let root = null;
+  const parser = new Parser(
+    {
+      onopentag(name, attrs) {
+        const node = {
+          type: "element",
+          tagName: name.toLowerCase(),
+          attrs: { ...attrs },
+          children: [],
+          parent: stack.length > 0 ? stack[stack.length - 1] : null,
+          index: nodes.length,
+        };
+        nodes.push(node);
+        if (node.parent) {
+          node.parent.children.push(node);
+        } else {
+          root = node;
+        }
+        stack.push(node);
+      },
+      ontext(text) {
+        // Only capture non-whitespace text for accessibility analysis
+        const trimmed = text.trim();
+        if (trimmed && stack.length > 0) {
+          const textNode = {
+            type: "text",
+            content: text,
+            trimmed: trimmed,
+            parent: stack[stack.length - 1],
+            index: nodes.length,
+          };
+          nodes.push(textNode);
+          stack[stack.length - 1].children.push(textNode);
+        }
+      },
+      onclosetag() {
+        stack.pop();
+      },
+    },
+    {
+      lowerCaseTags: true,
+      lowerCaseAttributeNames: true,
+      decodeEntities: true,
+    }
+  );
+  parser.write(html);
+  parser.end();
+  return { nodes, root };
+}
+/**
+ * Get the text content of an element (concatenated child text nodes).
+ * @param {Object} node - Element node
+ * @returns {string}
+ */
+function getTextContent(node) {
+  if (!node || !node.children) return "";
+  let text = "";
+  for (const child of node.children) {
+    if (child.type === "text") {
+      text += child.content;
+    } else if (child.type === "element") {
+      text += getTextContent(child);
+    }
+  }
+  return text.trim();
+}
+/**
+ * Find all elements matching a predicate.
+ * @param {Array} nodes - Flat nodes array
+ * @param {Function} predicate - (node) => boolean
+ * @returns {Array}
+ */
+function findElements(nodes, predicate) {
+  return nodes.filter((n) => n.type === "element" && predicate(n));
+}
+/**
+ * Find element by ID.
+ * @param {Array} nodes - Flat nodes array
+ * @param {string} id - Element ID
+ * @returns {Object|null}
+ */
+function getElementById(nodes, id) {
+  return nodes.find((n) => n.type === "element" && n.attrs.id === id) || null;
+}
+module.exports = {
+  parseHtml,
+  getTextContent,
+  findElements,
+  getElementById,
+};

package/src/ids.js ADDED Viewed

@@ -0,0 +1,53 @@
+"use strict";
+/**
+ * Generate deterministic finding IDs.
+ * Findings are sorted by (file, rule_id, pointer) then numbered.
+ *
+ * @param {Array} findings - Raw findings array
+ * @returns {Array} Findings with assigned finding_id
+ */
+function assignFindingIds(findings) {
+  // Sort for deterministic ordering
+  const sorted = [...findings].sort((a, b) => {
+    // First by file
+    const fileCompare = a.location.file.localeCompare(b.location.file);
+    if (fileCompare !== 0) return fileCompare;
+    // Then by rule_id
+    const ruleCompare = a.rule_id.localeCompare(b.rule_id);
+    if (ruleCompare !== 0) return ruleCompare;
+    // Then by pointer (numeric extraction for proper sorting)
+    const aIndex = extractPointerIndex(a.location.json_pointer);
+    const bIndex = extractPointerIndex(b.location.json_pointer);
+    return aIndex - bIndex;
+  });
+  // Assign sequential IDs
+  return sorted.map((finding, index) => ({
+    ...finding,
+    finding_id: formatFindingId(index + 1),
+  }));
+}
+/**
+ * Extract numeric index from JSON pointer.
+ * @param {string} pointer - e.g., "/nodes/12"
+ * @returns {number}
+ */
+function extractPointerIndex(pointer) {
+  const match = pointer.match(/\/nodes\/(\d+)/);
+  return match ? parseInt(match[1], 10) : 0;
+}
+/**
+ * Format finding ID with zero-padding.
+ * @param {number} num - Finding number (1-based)
+ * @returns {string} e.g., "finding-0001"
+ */
+function formatFindingId(num) {
+  return `finding-${String(num).padStart(4, "0")}`;
+}
+module.exports = { assignFindingIds, formatFindingId };

package/src/rules/document_missing_lang.js ADDED Viewed

@@ -0,0 +1,50 @@
+"use strict";
+const { findElements } = require("../html_parse.js");
+const RULE_ID = "html.document.missing_lang";
+/**
+ * Check for <html> element missing lang attribute.
+ * WCAG 3.1.1 - Language of Page (Level A)
+ */
+function run(nodes, context) {
+  const findings = [];
+  const htmlElements = findElements(nodes, (n) => n.tagName === "html");
+  for (const node of htmlElements) {
+    const lang = node.attrs.lang;
+    if (!lang || lang.trim() === "") {
+      findings.push({
+        rule_id: RULE_ID,
+        severity: "error",
+        confidence: 1.0,
+        message: "Document is missing a lang attribute on the <html> element.",
+        location: {
+          file: context.relativePath,
+          json_pointer: `/nodes/${node.index}`,
+        },
+        evidence: {
+          tagName: node.tagName,
+          attrs: filterAttrs(node.attrs, ["lang"]),
+        },
+      });
+    }
+  }
+  return findings;
+}
+function filterAttrs(attrs, keys) {
+  const filtered = {};
+  for (const key of keys) {
+    if (key in attrs) {
+      filtered[key] = attrs[key];
+    }
+  }
+  return filtered;
+}
+module.exports = { id: RULE_ID, run };

package/src/rules/form_control_missing_label.js ADDED Viewed

@@ -0,0 +1,105 @@
+"use strict";
+const { findElements, getElementById } = require("../html_parse.js");
+const RULE_ID = "html.form_control.missing_label";
+const FORM_CONTROLS = ["input", "select", "textarea"];
+const EXEMPT_INPUT_TYPES = ["hidden", "submit", "reset", "button", "image"];
+/**
+ * Check for form controls missing associated labels.
+ * WCAG 1.3.1 - Info and Relationships (Level A)
+ * WCAG 4.1.2 - Name, Role, Value (Level A)
+ *
+ * A form control needs one of:
+ * - Associated <label for="...">
+ * - aria-label attribute
+ * - aria-labelledby attribute (pointing to existing element)
+ */
+function run(nodes, context) {
+  const findings = [];
+  // Find all label elements for lookup
+  const labels = findElements(nodes, (n) => n.tagName === "label");
+  const labelForIds = new Set(
+    labels.map((l) => l.attrs.for).filter((f) => f)
+  );
+  // Check form controls
+  const controls = findElements(nodes, (n) => FORM_CONTROLS.includes(n.tagName));
+  for (const node of controls) {
+    // Skip exempt input types
+    if (node.tagName === "input") {
+      const type = (node.attrs.type || "text").toLowerCase();
+      if (EXEMPT_INPUT_TYPES.includes(type)) {
+        continue;
+      }
+    }
+    if (!hasAccessibleLabel(node, labelForIds, nodes)) {
+      findings.push({
+        rule_id: RULE_ID,
+        severity: "error",
+        confidence: 0.95,
+        message: `Form control <${node.tagName}> is missing an associated label.`,
+        location: {
+          file: context.relativePath,
+          json_pointer: `/nodes/${node.index}`,
+        },
+        evidence: {
+          tagName: node.tagName,
+          attrs: filterAttrs(node.attrs, [
+            "id",
+            "name",
+            "type",
+            "aria-label",
+            "aria-labelledby",
+          ]),
+        },
+      });
+    }
+  }
+  return findings;
+}
+/**
+ * Check if a form control has an accessible label.
+ */
+function hasAccessibleLabel(node, labelForIds, nodes) {
+  // Check aria-label
+  if (node.attrs["aria-label"] && node.attrs["aria-label"].trim()) {
+    return true;
+  }
+  // Check aria-labelledby (must point to existing element)
+  if (node.attrs["aria-labelledby"]) {
+    const ids = node.attrs["aria-labelledby"].split(/\s+/);
+    for (const id of ids) {
+      if (getElementById(nodes, id)) {
+        return true;
+      }
+    }
+  }
+  // Check for associated label via 'for' attribute
+  if (node.attrs.id && labelForIds.has(node.attrs.id)) {
+    return true;
+  }
+  return false;
+}
+function filterAttrs(attrs, keys) {
+  const filtered = {};
+  for (const key of keys) {
+    if (key in attrs) {
+      filtered[key] = attrs[key];
+    }
+  }
+  return filtered;
+}
+module.exports = { id: RULE_ID, run };

package/src/rules/img_missing_alt.js ADDED Viewed

@@ -0,0 +1,77 @@
+"use strict";
+const { findElements } = require("../html_parse.js");
+const RULE_ID = "html.img.missing_alt";
+/**
+ * Check for <img> elements missing alt attribute.
+ * WCAG 1.1.1 - Non-text Content (Level A)
+ *
+ * Exceptions:
+ * - role="presentation" or role="none"
+ * - aria-hidden="true"
+ */
+function run(nodes, context) {
+  const findings = [];
+  const imgElements = findElements(nodes, (n) => n.tagName === "img");
+  for (const node of imgElements) {
+    // Skip decorative images
+    if (isDecorativeImage(node)) {
+      continue;
+    }
+    const alt = node.attrs.alt;
+    // Missing alt attribute entirely
+    if (alt === undefined) {
+      findings.push({
+        rule_id: RULE_ID,
+        severity: "error",
+        confidence: 0.98,
+        message: "Image element is missing alt text.",
+        location: {
+          file: context.relativePath,
+          json_pointer: `/nodes/${node.index}`,
+        },
+        evidence: {
+          tagName: node.tagName,
+          attrs: filterAttrs(node.attrs, ["src", "alt", "role", "aria-hidden"]),
+        },
+      });
+    }
+  }
+  return findings;
+}
+/**
+ * Check if image is marked as decorative.
+ */
+function isDecorativeImage(node) {
+  const role = node.attrs.role;
+  if (role === "presentation" || role === "none") {
+    return true;
+  }
+  const ariaHidden = node.attrs["aria-hidden"];
+  if (ariaHidden === "true") {
+    return true;
+  }
+  return false;
+}
+function filterAttrs(attrs, keys) {
+  const filtered = {};
+  for (const key of keys) {
+    if (key in attrs) {
+      filtered[key] = attrs[key];
+    }
+  }
+  return filtered;
+}
+module.exports = { id: RULE_ID, run };