npm - xml-to-html-converter - Versions diffs - 0.4.0 → 0.4.2 - Mend

xml-to-html-converter 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -8,7 +8,7 @@
 A zero-dependency Node.js package for converting XML to HTML.
-- **`minify(xml)`** strips inter-tag whitespace from prettified XML before parsing. Text content is left untouched
+- **`minify(xml)`** removes prettification whitespace between markup tokens before parsing. Non-whitespace text content and CDATA are left untouched
 - **`scaffold(xml)`** reads any XML string and returns a nested node tree
 - **`walk(nodes, visitor)`** traverses the full node tree depth-first, visiting every node
 - **`render(nodes)`** converts a node tree to an HTML string. Every XML element becomes a `<div>` with `data-tag` and `data-attrs-*` attributes
@@ -27,7 +27,7 @@ npm install xml-to-html-converter
 ### minify
-When your XML comes from a file or an API it is usually indented and line-broken. `minify` strips the whitespace between tags before parsing. Text content is left completely untouched.
+When your XML comes from a file or an API it is usually indented and line-broken. `minify` removes whitespace-only text nodes that include line breaks when they appear between markup tokens. Text content and CDATA are left completely untouched.
 ```js
 import { minify } from "xml-to-html-converter";
@@ -42,7 +42,7 @@ const clean = minify(`
 // <bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book></bookstore>
 ```
-`minify` is opt-in. Skip it if whitespace inside your content is meaningful.
+`minify` is opt-in. Skip it if whitespace-only nodes between markup tokens are meaningful to your use case.
 ---
@@ -172,7 +172,7 @@ const html = render(
 </div>
 ```
-Processing instructions and doctypes are dropped. Comments are passed through unchanged.
+Processing instructions and doctypes are dropped. Comments are passed through unchanged. The output is a raw HTML string — if you are inserting it into a web page, treat it accordingly.
 ---
@@ -228,11 +228,15 @@ Every node in the tree has the following fields:
 `scaffold` never throws. No matter what the input looks like, it always returns a complete tree. Malformed structures are flagged with `malformed: true` in place and the walk continues.
-Four cases are handled:
+Eight cases are handled:
 - **Unclosed tags** - opens but never closes, gets `malformed: true`, children are still collected
 - **Stray closing tags** - a `</tag>` with no matching open surfaces as a `closeTag` token with `malformed: true`
 - **Unclosed brackets** - a `<` with no matching `>` captures the remainder as a malformed token
+- **Malformed attributes** - unquoted values (`<tag attr=unquoted>`), invalid separators (`<tag a="1"b="2">`), trailing junk after valid attributes (`<tag a="1" junk>`), and unclosed quoted values all flag the node `malformed: true`; any valid attributes parsed before the error are preserved
+- **Unclosed processing instructions** - `<?xml ...` with no `?>` captures the remainder as a malformed token
+- **Unclosed comments** - `<!-- ...` with no `-->` captures the remainder as a malformed token
+- **Unclosed CDATA** - `<![CDATA[ ...` with no `]]>` captures the remainder as a malformed token
 - **Excessive nesting** - documents nested beyond 500 levels have the deepest open tag flagged `malformed: true` to prevent a stack overflow
 ```js

package/dist/index.cjs CHANGED Viewed

@@ -30,7 +30,71 @@ module.exports = __toCommonJS(src_exports);
 // src/modules/minify/minify.ts
 function minify(xml) {
-  return xml.replace(/>(\s+)</g, (_, gap) => gap.trim() === "" ? "><" : `>${gap}<`).trim();
+  const input = xml.trim();
+  if (input === "") return "";
+  const tokens = tokenize(input);
+  return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
+}
+function isRemovableWhitespace(token, tokens, index) {
+  if (token.type !== "text") return false;
+  if (!/^\s+$/.test(token.value)) return false;
+  if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
+  const previous = tokens[index - 1];
+  const next = tokens[index + 1];
+  return previous?.type === "markup" && next?.type === "markup";
+}
+function tokenize(xml) {
+  const tokens = [];
+  let position = 0;
+  while (position < xml.length) {
+    if (xml[position] !== "<") {
+      const nextMarkup = xml.indexOf("<", position);
+      const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
+      tokens.push({ type: "text", value: xml.slice(position, end2) });
+      position = end2;
+      continue;
+    }
+    const end = findMarkupEnd(xml, position);
+    tokens.push({ type: "markup", value: xml.slice(position, end) });
+    position = end;
+  }
+  return tokens;
+}
+function findMarkupEnd(xml, start) {
+  if (xml.startsWith("<!--", start)) {
+    const end = xml.indexOf("-->", start + 4);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<![CDATA[", start)) {
+    const end = xml.indexOf("]]>", start + 9);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<?", start)) {
+    const end = xml.indexOf("?>", start + 2);
+    return end === -1 ? xml.length : end + 2;
+  }
+  if (xml.startsWith("<!DOCTYPE", start)) {
+    const bracketOpen = xml.indexOf("[", start);
+    const firstClose = xml.indexOf(">", start);
+    if (bracketOpen !== -1 && bracketOpen < firstClose) {
+      const bracketClose = xml.indexOf("]>", bracketOpen + 1);
+      return bracketClose === -1 ? xml.length : bracketClose + 2;
+    }
+    return firstClose === -1 ? xml.length : firstClose + 1;
+  }
+  let i = start + 1;
+  while (i < xml.length) {
+    const ch = xml[i];
+    if (ch === '"' || ch === "'") {
+      const closeQuote = xml.indexOf(ch, i + 1);
+      if (closeQuote === -1) return xml.length;
+      i = closeQuote + 1;
+      continue;
+    }
+    if (ch === ">") return i + 1;
+    i++;
+  }
+  return xml.length;
 }
 // src/modules/render/render.ts
@@ -61,28 +125,55 @@ function buildDataAttrs(node) {
 function parseXmlAttributes(xmlInner) {
   const attributes = [];
   let i = 0;
+  let malformed = false;
   const s = xmlInner.trim();
   while (i < s.length) {
+    const whitespaceStart = i;
     while (i < s.length && /\s/.test(s[i])) i++;
+    const hasSeparatorWhitespace = i > whitespaceStart;
     if (i >= s.length) break;
+    if (attributes.length > 0 && !hasSeparatorWhitespace) {
+      malformed = true;
+      break;
+    }
     const nameStart = i;
     while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
     const name = s.slice(nameStart, i).trim();
-    if (!name) break;
+    if (!name) {
+      malformed = true;
+      break;
+    }
     while (i < s.length && /\s/.test(s[i])) i++;
-    if (s[i] !== "=") break;
+    if (s[i] !== "=") {
+      malformed = true;
+      break;
+    }
     i++;
     while (i < s.length && /\s/.test(s[i])) i++;
     const quote = s[i];
-    if (quote !== '"' && quote !== "'") break;
+    if (quote !== '"' && quote !== "'") {
+      return {
+        attributes: attributes.length > 0 ? attributes : void 0,
+        malformed: true
+      };
+    }
     i++;
     const valueStart = i;
     while (i < s.length && s[i] !== quote) i++;
+    if (i >= s.length) {
+      return {
+        attributes: attributes.length > 0 ? attributes : void 0,
+        malformed: true
+      };
+    }
     const value = s.slice(valueStart, i);
     i++;
     attributes.push({ name, value });
   }
-  return attributes.length > 0 ? attributes : void 0;
+  return {
+    attributes: attributes.length > 0 ? attributes : void 0,
+    malformed
+  };
 }
 var MAX_DEPTH = 500;
 function scaffold(xml) {
@@ -185,7 +276,8 @@ function extractXmlNodes(xml, position) {
       raw: xml.slice(position),
       role: "processingInstruction",
       tag: "",
-      end: xml.length
+      end: xml.length,
+      malformed: true
     } : {
       raw: xml.slice(position, end2 + 2),
       role: "processingInstruction",
@@ -195,7 +287,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "[") {
     const end2 = xml.indexOf("]]>", position + 3);
-    return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "textLeaf",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "textLeaf",
       tag: "",
@@ -204,7 +302,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
     const end2 = xml.indexOf("-->", position + 4);
-    return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "comment",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "comment",
       tag: "",
@@ -244,13 +348,29 @@ function extractXmlNodes(xml, position) {
     const trimmed = inner.slice(0, -1).trim();
     const tag2 = trimmed.split(/\s/)[0] ?? "";
     const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
-    const xmlAttributes2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
-    return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: xmlAttributes2, end };
+    const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
+    return {
+      raw,
+      role: "selfTag",
+      tag: tag2,
+      xmlInner: xmlInner2,
+      xmlAttributes: parsed2?.attributes,
+      end,
+      malformed: parsed2?.malformed ? true : void 0
+    };
   }
   const tag = inner.split(/\s/)[0] ?? "";
   const xmlInner = inner.slice(tag.length).trim() || void 0;
-  const xmlAttributes = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
-  return { raw, role: "openTag", tag, xmlInner, xmlAttributes, end };
+  const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
+  return {
+    raw,
+    role: "openTag",
+    tag,
+    xmlInner,
+    xmlAttributes: parsed?.attributes,
+    end,
+    malformed: parsed?.malformed ? true : void 0
+  };
 }
 // src/modules/scaffold/types.ts

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,70 @@
 // src/modules/minify/minify.ts
 function minify(xml) {
-  return xml.replace(/>(\s+)</g, (_, gap) => gap.trim() === "" ? "><" : `>${gap}<`).trim();
+  const input = xml.trim();
+  if (input === "") return "";
+  const tokens = tokenize(input);
+  return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
+}
+function isRemovableWhitespace(token, tokens, index) {
+  if (token.type !== "text") return false;
+  if (!/^\s+$/.test(token.value)) return false;
+  if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
+  const previous = tokens[index - 1];
+  const next = tokens[index + 1];
+  return previous?.type === "markup" && next?.type === "markup";
+}
+function tokenize(xml) {
+  const tokens = [];
+  let position = 0;
+  while (position < xml.length) {
+    if (xml[position] !== "<") {
+      const nextMarkup = xml.indexOf("<", position);
+      const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
+      tokens.push({ type: "text", value: xml.slice(position, end2) });
+      position = end2;
+      continue;
+    }
+    const end = findMarkupEnd(xml, position);
+    tokens.push({ type: "markup", value: xml.slice(position, end) });
+    position = end;
+  }
+  return tokens;
+}
+function findMarkupEnd(xml, start) {
+  if (xml.startsWith("<!--", start)) {
+    const end = xml.indexOf("-->", start + 4);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<![CDATA[", start)) {
+    const end = xml.indexOf("]]>", start + 9);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<?", start)) {
+    const end = xml.indexOf("?>", start + 2);
+    return end === -1 ? xml.length : end + 2;
+  }
+  if (xml.startsWith("<!DOCTYPE", start)) {
+    const bracketOpen = xml.indexOf("[", start);
+    const firstClose = xml.indexOf(">", start);
+    if (bracketOpen !== -1 && bracketOpen < firstClose) {
+      const bracketClose = xml.indexOf("]>", bracketOpen + 1);
+      return bracketClose === -1 ? xml.length : bracketClose + 2;
+    }
+    return firstClose === -1 ? xml.length : firstClose + 1;
+  }
+  let i = start + 1;
+  while (i < xml.length) {
+    const ch = xml[i];
+    if (ch === '"' || ch === "'") {
+      const closeQuote = xml.indexOf(ch, i + 1);
+      if (closeQuote === -1) return xml.length;
+      i = closeQuote + 1;
+      continue;
+    }
+    if (ch === ">") return i + 1;
+    i++;
+  }
+  return xml.length;
 }
 // src/modules/render/render.ts
@@ -31,28 +95,55 @@ function buildDataAttrs(node) {
 function parseXmlAttributes(xmlInner) {
   const attributes = [];
   let i = 0;
+  let malformed = false;
   const s = xmlInner.trim();
   while (i < s.length) {
+    const whitespaceStart = i;
     while (i < s.length && /\s/.test(s[i])) i++;
+    const hasSeparatorWhitespace = i > whitespaceStart;
     if (i >= s.length) break;
+    if (attributes.length > 0 && !hasSeparatorWhitespace) {
+      malformed = true;
+      break;
+    }
     const nameStart = i;
     while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
     const name = s.slice(nameStart, i).trim();
-    if (!name) break;
+    if (!name) {
+      malformed = true;
+      break;
+    }
     while (i < s.length && /\s/.test(s[i])) i++;
-    if (s[i] !== "=") break;
+    if (s[i] !== "=") {
+      malformed = true;
+      break;
+    }
     i++;
     while (i < s.length && /\s/.test(s[i])) i++;
     const quote = s[i];
-    if (quote !== '"' && quote !== "'") break;
+    if (quote !== '"' && quote !== "'") {
+      return {
+        attributes: attributes.length > 0 ? attributes : void 0,
+        malformed: true
+      };
+    }
     i++;
     const valueStart = i;
     while (i < s.length && s[i] !== quote) i++;
+    if (i >= s.length) {
+      return {
+        attributes: attributes.length > 0 ? attributes : void 0,
+        malformed: true
+      };
+    }
     const value = s.slice(valueStart, i);
     i++;
     attributes.push({ name, value });
   }
-  return attributes.length > 0 ? attributes : void 0;
+  return {
+    attributes: attributes.length > 0 ? attributes : void 0,
+    malformed
+  };
 }
 var MAX_DEPTH = 500;
 function scaffold(xml) {
@@ -155,7 +246,8 @@ function extractXmlNodes(xml, position) {
       raw: xml.slice(position),
       role: "processingInstruction",
       tag: "",
-      end: xml.length
+      end: xml.length,
+      malformed: true
     } : {
       raw: xml.slice(position, end2 + 2),
       role: "processingInstruction",
@@ -165,7 +257,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "[") {
     const end2 = xml.indexOf("]]>", position + 3);
-    return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "textLeaf",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "textLeaf",
       tag: "",
@@ -174,7 +272,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
     const end2 = xml.indexOf("-->", position + 4);
-    return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "comment",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "comment",
       tag: "",
@@ -214,13 +318,29 @@ function extractXmlNodes(xml, position) {
     const trimmed = inner.slice(0, -1).trim();
     const tag2 = trimmed.split(/\s/)[0] ?? "";
     const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
-    const xmlAttributes2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
-    return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: xmlAttributes2, end };
+    const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
+    return {
+      raw,
+      role: "selfTag",
+      tag: tag2,
+      xmlInner: xmlInner2,
+      xmlAttributes: parsed2?.attributes,
+      end,
+      malformed: parsed2?.malformed ? true : void 0
+    };
   }
   const tag = inner.split(/\s/)[0] ?? "";
   const xmlInner = inner.slice(tag.length).trim() || void 0;
-  const xmlAttributes = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
-  return { raw, role: "openTag", tag, xmlInner, xmlAttributes, end };
+  const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
+  return {
+    raw,
+    role: "openTag",
+    tag,
+    xmlInner,
+    xmlAttributes: parsed?.attributes,
+    end,
+    malformed: parsed?.malformed ? true : void 0
+  };
 }
 // src/modules/scaffold/types.ts

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "xml-to-html-converter",
-  "version": "0.4.0",
+  "version": "0.4.2",
   "description": "Zero dependency XML to HTML converter for Node environments",
   "type": "module",
   "main": "./dist/index.cjs",