npm - xml-to-html-converter - Versions diffs - 0.4.1 → 0.4.2 - Mend

xml-to-html-converter 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -8,7 +8,7 @@
 A zero-dependency Node.js package for converting XML to HTML.
-- **`minify(xml)`** strips inter-tag whitespace from prettified XML before parsing. Text content is left untouched
+- **`minify(xml)`** removes prettification whitespace between markup tokens before parsing. Non-whitespace text content and CDATA are left untouched
 - **`scaffold(xml)`** reads any XML string and returns a nested node tree
 - **`walk(nodes, visitor)`** traverses the full node tree depth-first, visiting every node
 - **`render(nodes)`** converts a node tree to an HTML string. Every XML element becomes a `<div>` with `data-tag` and `data-attrs-*` attributes
@@ -27,7 +27,7 @@ npm install xml-to-html-converter
 ### minify
-When your XML comes from a file or an API it is usually indented and line-broken. `minify` strips the whitespace between tags before parsing. Text content is left completely untouched.
+When your XML comes from a file or an API it is usually indented and line-broken. `minify` removes whitespace-only text nodes that include line breaks when they appear between markup tokens. Text content and CDATA are left completely untouched.
 ```js
 import { minify } from "xml-to-html-converter";
@@ -42,7 +42,7 @@ const clean = minify(`
 // <bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book></bookstore>
 ```
-`minify` is opt-in. Skip it if whitespace inside your content is meaningful.
+`minify` is opt-in. Skip it if whitespace-only nodes between markup tokens are meaningful to your use case.
 ---
@@ -233,7 +233,7 @@ Eight cases are handled:
 - **Unclosed tags** - opens but never closes, gets `malformed: true`, children are still collected
 - **Stray closing tags** - a `</tag>` with no matching open surfaces as a `closeTag` token with `malformed: true`
 - **Unclosed brackets** - a `<` with no matching `>` captures the remainder as a malformed token
-- **Unquoted attributes** - `<tag attr=unquoted>` flags the node `malformed: true`, any valid attributes parsed before the error are preserved
+- **Malformed attributes** - unquoted values (`<tag attr=unquoted>`), invalid separators (`<tag a="1"b="2">`), trailing junk after valid attributes (`<tag a="1" junk>`), and unclosed quoted values all flag the node `malformed: true`; any valid attributes parsed before the error are preserved
 - **Unclosed processing instructions** - `<?xml ...` with no `?>` captures the remainder as a malformed token
 - **Unclosed comments** - `<!-- ...` with no `-->` captures the remainder as a malformed token
 - **Unclosed CDATA** - `<![CDATA[ ...` with no `]]>` captures the remainder as a malformed token

package/dist/index.cjs CHANGED Viewed

@@ -30,7 +30,71 @@ module.exports = __toCommonJS(src_exports);
 // src/modules/minify/minify.ts
 function minify(xml) {
-  return xml.replace(/>(\s+)</g, "><").trim();
+  const input = xml.trim();
+  if (input === "") return "";
+  const tokens = tokenize(input);
+  return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
+}
+function isRemovableWhitespace(token, tokens, index) {
+  if (token.type !== "text") return false;
+  if (!/^\s+$/.test(token.value)) return false;
+  if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
+  const previous = tokens[index - 1];
+  const next = tokens[index + 1];
+  return previous?.type === "markup" && next?.type === "markup";
+}
+function tokenize(xml) {
+  const tokens = [];
+  let position = 0;
+  while (position < xml.length) {
+    if (xml[position] !== "<") {
+      const nextMarkup = xml.indexOf("<", position);
+      const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
+      tokens.push({ type: "text", value: xml.slice(position, end2) });
+      position = end2;
+      continue;
+    }
+    const end = findMarkupEnd(xml, position);
+    tokens.push({ type: "markup", value: xml.slice(position, end) });
+    position = end;
+  }
+  return tokens;
+}
+function findMarkupEnd(xml, start) {
+  if (xml.startsWith("<!--", start)) {
+    const end = xml.indexOf("-->", start + 4);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<![CDATA[", start)) {
+    const end = xml.indexOf("]]>", start + 9);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<?", start)) {
+    const end = xml.indexOf("?>", start + 2);
+    return end === -1 ? xml.length : end + 2;
+  }
+  if (xml.startsWith("<!DOCTYPE", start)) {
+    const bracketOpen = xml.indexOf("[", start);
+    const firstClose = xml.indexOf(">", start);
+    if (bracketOpen !== -1 && bracketOpen < firstClose) {
+      const bracketClose = xml.indexOf("]>", bracketOpen + 1);
+      return bracketClose === -1 ? xml.length : bracketClose + 2;
+    }
+    return firstClose === -1 ? xml.length : firstClose + 1;
+  }
+  let i = start + 1;
+  while (i < xml.length) {
+    const ch = xml[i];
+    if (ch === '"' || ch === "'") {
+      const closeQuote = xml.indexOf(ch, i + 1);
+      if (closeQuote === -1) return xml.length;
+      i = closeQuote + 1;
+      continue;
+    }
+    if (ch === ">") return i + 1;
+    i++;
+  }
+  return xml.length;
 }
 // src/modules/render/render.ts
@@ -61,16 +125,29 @@ function buildDataAttrs(node) {
 function parseXmlAttributes(xmlInner) {
   const attributes = [];
   let i = 0;
+  let malformed = false;
   const s = xmlInner.trim();
   while (i < s.length) {
+    const whitespaceStart = i;
     while (i < s.length && /\s/.test(s[i])) i++;
+    const hasSeparatorWhitespace = i > whitespaceStart;
     if (i >= s.length) break;
+    if (attributes.length > 0 && !hasSeparatorWhitespace) {
+      malformed = true;
+      break;
+    }
     const nameStart = i;
     while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
     const name = s.slice(nameStart, i).trim();
-    if (!name) break;
+    if (!name) {
+      malformed = true;
+      break;
+    }
     while (i < s.length && /\s/.test(s[i])) i++;
-    if (s[i] !== "=") break;
+    if (s[i] !== "=") {
+      malformed = true;
+      break;
+    }
     i++;
     while (i < s.length && /\s/.test(s[i])) i++;
     const quote = s[i];
@@ -83,13 +160,19 @@ function parseXmlAttributes(xmlInner) {
     i++;
     const valueStart = i;
     while (i < s.length && s[i] !== quote) i++;
+    if (i >= s.length) {
+      return {
+        attributes: attributes.length > 0 ? attributes : void 0,
+        malformed: true
+      };
+    }
     const value = s.slice(valueStart, i);
     i++;
     attributes.push({ name, value });
   }
   return {
     attributes: attributes.length > 0 ? attributes : void 0,
-    malformed: false
+    malformed
   };
 }
 var MAX_DEPTH = 500;
@@ -204,7 +287,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "[") {
     const end2 = xml.indexOf("]]>", position + 3);
-    return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length, malformed: true } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "textLeaf",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "textLeaf",
       tag: "",
@@ -213,7 +302,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
     const end2 = xml.indexOf("-->", position + 4);
-    return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length, malformed: true } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "comment",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "comment",
       tag: "",
@@ -254,12 +349,28 @@ function extractXmlNodes(xml, position) {
     const tag2 = trimmed.split(/\s/)[0] ?? "";
     const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
     const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
-    return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: parsed2?.attributes, end, malformed: parsed2?.malformed ? true : void 0 };
+    return {
+      raw,
+      role: "selfTag",
+      tag: tag2,
+      xmlInner: xmlInner2,
+      xmlAttributes: parsed2?.attributes,
+      end,
+      malformed: parsed2?.malformed ? true : void 0
+    };
   }
   const tag = inner.split(/\s/)[0] ?? "";
   const xmlInner = inner.slice(tag.length).trim() || void 0;
   const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
-  return { raw, role: "openTag", tag, xmlInner, xmlAttributes: parsed?.attributes, end, malformed: parsed?.malformed ? true : void 0 };
+  return {
+    raw,
+    role: "openTag",
+    tag,
+    xmlInner,
+    xmlAttributes: parsed?.attributes,
+    end,
+    malformed: parsed?.malformed ? true : void 0
+  };
 }
 // src/modules/scaffold/types.ts

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,70 @@
 // src/modules/minify/minify.ts
 function minify(xml) {
-  return xml.replace(/>(\s+)</g, "><").trim();
+  const input = xml.trim();
+  if (input === "") return "";
+  const tokens = tokenize(input);
+  return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
+}
+function isRemovableWhitespace(token, tokens, index) {
+  if (token.type !== "text") return false;
+  if (!/^\s+$/.test(token.value)) return false;
+  if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
+  const previous = tokens[index - 1];
+  const next = tokens[index + 1];
+  return previous?.type === "markup" && next?.type === "markup";
+}
+function tokenize(xml) {
+  const tokens = [];
+  let position = 0;
+  while (position < xml.length) {
+    if (xml[position] !== "<") {
+      const nextMarkup = xml.indexOf("<", position);
+      const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
+      tokens.push({ type: "text", value: xml.slice(position, end2) });
+      position = end2;
+      continue;
+    }
+    const end = findMarkupEnd(xml, position);
+    tokens.push({ type: "markup", value: xml.slice(position, end) });
+    position = end;
+  }
+  return tokens;
+}
+function findMarkupEnd(xml, start) {
+  if (xml.startsWith("<!--", start)) {
+    const end = xml.indexOf("-->", start + 4);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<![CDATA[", start)) {
+    const end = xml.indexOf("]]>", start + 9);
+    return end === -1 ? xml.length : end + 3;
+  }
+  if (xml.startsWith("<?", start)) {
+    const end = xml.indexOf("?>", start + 2);
+    return end === -1 ? xml.length : end + 2;
+  }
+  if (xml.startsWith("<!DOCTYPE", start)) {
+    const bracketOpen = xml.indexOf("[", start);
+    const firstClose = xml.indexOf(">", start);
+    if (bracketOpen !== -1 && bracketOpen < firstClose) {
+      const bracketClose = xml.indexOf("]>", bracketOpen + 1);
+      return bracketClose === -1 ? xml.length : bracketClose + 2;
+    }
+    return firstClose === -1 ? xml.length : firstClose + 1;
+  }
+  let i = start + 1;
+  while (i < xml.length) {
+    const ch = xml[i];
+    if (ch === '"' || ch === "'") {
+      const closeQuote = xml.indexOf(ch, i + 1);
+      if (closeQuote === -1) return xml.length;
+      i = closeQuote + 1;
+      continue;
+    }
+    if (ch === ">") return i + 1;
+    i++;
+  }
+  return xml.length;
 }
 // src/modules/render/render.ts
@@ -31,16 +95,29 @@ function buildDataAttrs(node) {
 function parseXmlAttributes(xmlInner) {
   const attributes = [];
   let i = 0;
+  let malformed = false;
   const s = xmlInner.trim();
   while (i < s.length) {
+    const whitespaceStart = i;
     while (i < s.length && /\s/.test(s[i])) i++;
+    const hasSeparatorWhitespace = i > whitespaceStart;
     if (i >= s.length) break;
+    if (attributes.length > 0 && !hasSeparatorWhitespace) {
+      malformed = true;
+      break;
+    }
     const nameStart = i;
     while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
     const name = s.slice(nameStart, i).trim();
-    if (!name) break;
+    if (!name) {
+      malformed = true;
+      break;
+    }
     while (i < s.length && /\s/.test(s[i])) i++;
-    if (s[i] !== "=") break;
+    if (s[i] !== "=") {
+      malformed = true;
+      break;
+    }
     i++;
     while (i < s.length && /\s/.test(s[i])) i++;
     const quote = s[i];
@@ -53,13 +130,19 @@ function parseXmlAttributes(xmlInner) {
     i++;
     const valueStart = i;
     while (i < s.length && s[i] !== quote) i++;
+    if (i >= s.length) {
+      return {
+        attributes: attributes.length > 0 ? attributes : void 0,
+        malformed: true
+      };
+    }
     const value = s.slice(valueStart, i);
     i++;
     attributes.push({ name, value });
   }
   return {
     attributes: attributes.length > 0 ? attributes : void 0,
-    malformed: false
+    malformed
   };
 }
 var MAX_DEPTH = 500;
@@ -174,7 +257,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "[") {
     const end2 = xml.indexOf("]]>", position + 3);
-    return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length, malformed: true } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "textLeaf",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "textLeaf",
       tag: "",
@@ -183,7 +272,13 @@ function extractXmlNodes(xml, position) {
   }
   if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
     const end2 = xml.indexOf("-->", position + 4);
-    return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length, malformed: true } : {
+    return end2 === -1 ? {
+      raw: xml.slice(position),
+      role: "comment",
+      tag: "",
+      end: xml.length,
+      malformed: true
+    } : {
       raw: xml.slice(position, end2 + 3),
       role: "comment",
       tag: "",
@@ -224,12 +319,28 @@ function extractXmlNodes(xml, position) {
     const tag2 = trimmed.split(/\s/)[0] ?? "";
     const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
     const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
-    return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: parsed2?.attributes, end, malformed: parsed2?.malformed ? true : void 0 };
+    return {
+      raw,
+      role: "selfTag",
+      tag: tag2,
+      xmlInner: xmlInner2,
+      xmlAttributes: parsed2?.attributes,
+      end,
+      malformed: parsed2?.malformed ? true : void 0
+    };
   }
   const tag = inner.split(/\s/)[0] ?? "";
   const xmlInner = inner.slice(tag.length).trim() || void 0;
   const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
-  return { raw, role: "openTag", tag, xmlInner, xmlAttributes: parsed?.attributes, end, malformed: parsed?.malformed ? true : void 0 };
+  return {
+    raw,
+    role: "openTag",
+    tag,
+    xmlInner,
+    xmlAttributes: parsed?.attributes,
+    end,
+    malformed: parsed?.malformed ? true : void 0
+  };
 }
 // src/modules/scaffold/types.ts

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "xml-to-html-converter",
-  "version": "0.4.1",
+  "version": "0.4.2",
   "description": "Zero dependency XML to HTML converter for Node environments",
   "type": "module",
   "main": "./dist/index.cjs",