xml-to-html-converter 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  A zero-dependency Node.js package for converting XML to HTML.
10
10
 
11
- - **`minify(xml)`** strips inter-tag whitespace from prettified XML before parsing. Text content is left untouched
11
+ - **`minify(xml)`** removes prettification whitespace between markup tokens before parsing. Non-whitespace text content and CDATA are left untouched
12
12
  - **`scaffold(xml)`** reads any XML string and returns a nested node tree
13
13
  - **`walk(nodes, visitor)`** traverses the full node tree depth-first, visiting every node
14
14
  - **`render(nodes)`** converts a node tree to an HTML string. Every XML element becomes a `<div>` with `data-tag` and `data-attrs-*` attributes
@@ -27,7 +27,7 @@ npm install xml-to-html-converter
27
27
 
28
28
  ### minify
29
29
 
30
- When your XML comes from a file or an API it is usually indented and line-broken. `minify` strips the whitespace between tags before parsing. Text content is left completely untouched.
30
+ When your XML comes from a file or an API it is usually indented and line-broken. `minify` removes whitespace-only text nodes that include line breaks when they appear between markup tokens. Text content and CDATA are left completely untouched.
31
31
 
32
32
  ```js
33
33
  import { minify } from "xml-to-html-converter";
@@ -42,7 +42,7 @@ const clean = minify(`
42
42
  // <bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book></bookstore>
43
43
  ```
44
44
 
45
- `minify` is opt-in. Skip it if whitespace inside your content is meaningful.
45
+ `minify` is opt-in. Skip it if whitespace-only nodes between markup tokens are meaningful to your use case.
46
46
 
47
47
  ---
48
48
 
@@ -233,7 +233,7 @@ Eight cases are handled:
233
233
  - **Unclosed tags** - opens but never closes, gets `malformed: true`, children are still collected
234
234
  - **Stray closing tags** - a `</tag>` with no matching open surfaces as a `closeTag` token with `malformed: true`
235
235
  - **Unclosed brackets** - a `<` with no matching `>` captures the remainder as a malformed token
236
- - **Unquoted attributes** - `<tag attr=unquoted>` flags the node `malformed: true`, any valid attributes parsed before the error are preserved
236
+ - **Malformed attributes** - unquoted values (`<tag attr=unquoted>`), invalid separators (`<tag a="1"b="2">`), trailing junk after valid attributes (`<tag a="1" junk>`), and unclosed quoted values all flag the node `malformed: true`; any valid attributes parsed before the error are preserved
237
237
  - **Unclosed processing instructions** - `<?xml ...` with no `?>` captures the remainder as a malformed token
238
238
  - **Unclosed comments** - `<!-- ...` with no `-->` captures the remainder as a malformed token
239
239
  - **Unclosed CDATA** - `<![CDATA[ ...` with no `]]>` captures the remainder as a malformed token
package/dist/index.cjs CHANGED
@@ -30,7 +30,71 @@ module.exports = __toCommonJS(src_exports);
30
30
 
31
31
  // src/modules/minify/minify.ts
32
32
  function minify(xml) {
33
- return xml.replace(/>(\s+)</g, "><").trim();
33
+ const input = xml.trim();
34
+ if (input === "") return "";
35
+ const tokens = tokenize(input);
36
+ return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
37
+ }
38
+ function isRemovableWhitespace(token, tokens, index) {
39
+ if (token.type !== "text") return false;
40
+ if (!/^\s+$/.test(token.value)) return false;
41
+ if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
42
+ const previous = tokens[index - 1];
43
+ const next = tokens[index + 1];
44
+ return previous?.type === "markup" && next?.type === "markup";
45
+ }
46
+ function tokenize(xml) {
47
+ const tokens = [];
48
+ let position = 0;
49
+ while (position < xml.length) {
50
+ if (xml[position] !== "<") {
51
+ const nextMarkup = xml.indexOf("<", position);
52
+ const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
53
+ tokens.push({ type: "text", value: xml.slice(position, end2) });
54
+ position = end2;
55
+ continue;
56
+ }
57
+ const end = findMarkupEnd(xml, position);
58
+ tokens.push({ type: "markup", value: xml.slice(position, end) });
59
+ position = end;
60
+ }
61
+ return tokens;
62
+ }
63
+ function findMarkupEnd(xml, start) {
64
+ if (xml.startsWith("<!--", start)) {
65
+ const end = xml.indexOf("-->", start + 4);
66
+ return end === -1 ? xml.length : end + 3;
67
+ }
68
+ if (xml.startsWith("<![CDATA[", start)) {
69
+ const end = xml.indexOf("]]>", start + 9);
70
+ return end === -1 ? xml.length : end + 3;
71
+ }
72
+ if (xml.startsWith("<?", start)) {
73
+ const end = xml.indexOf("?>", start + 2);
74
+ return end === -1 ? xml.length : end + 2;
75
+ }
76
+ if (xml.startsWith("<!DOCTYPE", start)) {
77
+ const bracketOpen = xml.indexOf("[", start);
78
+ const firstClose = xml.indexOf(">", start);
79
+ if (bracketOpen !== -1 && bracketOpen < firstClose) {
80
+ const bracketClose = xml.indexOf("]>", bracketOpen + 1);
81
+ return bracketClose === -1 ? xml.length : bracketClose + 2;
82
+ }
83
+ return firstClose === -1 ? xml.length : firstClose + 1;
84
+ }
85
+ let i = start + 1;
86
+ while (i < xml.length) {
87
+ const ch = xml[i];
88
+ if (ch === '"' || ch === "'") {
89
+ const closeQuote = xml.indexOf(ch, i + 1);
90
+ if (closeQuote === -1) return xml.length;
91
+ i = closeQuote + 1;
92
+ continue;
93
+ }
94
+ if (ch === ">") return i + 1;
95
+ i++;
96
+ }
97
+ return xml.length;
34
98
  }
35
99
 
36
100
  // src/modules/render/render.ts
@@ -61,16 +125,29 @@ function buildDataAttrs(node) {
61
125
  function parseXmlAttributes(xmlInner) {
62
126
  const attributes = [];
63
127
  let i = 0;
128
+ let malformed = false;
64
129
  const s = xmlInner.trim();
65
130
  while (i < s.length) {
131
+ const whitespaceStart = i;
66
132
  while (i < s.length && /\s/.test(s[i])) i++;
133
+ const hasSeparatorWhitespace = i > whitespaceStart;
67
134
  if (i >= s.length) break;
135
+ if (attributes.length > 0 && !hasSeparatorWhitespace) {
136
+ malformed = true;
137
+ break;
138
+ }
68
139
  const nameStart = i;
69
140
  while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
70
141
  const name = s.slice(nameStart, i).trim();
71
- if (!name) break;
142
+ if (!name) {
143
+ malformed = true;
144
+ break;
145
+ }
72
146
  while (i < s.length && /\s/.test(s[i])) i++;
73
- if (s[i] !== "=") break;
147
+ if (s[i] !== "=") {
148
+ malformed = true;
149
+ break;
150
+ }
74
151
  i++;
75
152
  while (i < s.length && /\s/.test(s[i])) i++;
76
153
  const quote = s[i];
@@ -83,13 +160,19 @@ function parseXmlAttributes(xmlInner) {
83
160
  i++;
84
161
  const valueStart = i;
85
162
  while (i < s.length && s[i] !== quote) i++;
163
+ if (i >= s.length) {
164
+ return {
165
+ attributes: attributes.length > 0 ? attributes : void 0,
166
+ malformed: true
167
+ };
168
+ }
86
169
  const value = s.slice(valueStart, i);
87
170
  i++;
88
171
  attributes.push({ name, value });
89
172
  }
90
173
  return {
91
174
  attributes: attributes.length > 0 ? attributes : void 0,
92
- malformed: false
175
+ malformed
93
176
  };
94
177
  }
95
178
  var MAX_DEPTH = 500;
@@ -204,7 +287,13 @@ function extractXmlNodes(xml, position) {
204
287
  }
205
288
  if (xml[position + 1] === "!" && xml[position + 2] === "[") {
206
289
  const end2 = xml.indexOf("]]>", position + 3);
207
- return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length, malformed: true } : {
290
+ return end2 === -1 ? {
291
+ raw: xml.slice(position),
292
+ role: "textLeaf",
293
+ tag: "",
294
+ end: xml.length,
295
+ malformed: true
296
+ } : {
208
297
  raw: xml.slice(position, end2 + 3),
209
298
  role: "textLeaf",
210
299
  tag: "",
@@ -213,7 +302,13 @@ function extractXmlNodes(xml, position) {
213
302
  }
214
303
  if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
215
304
  const end2 = xml.indexOf("-->", position + 4);
216
- return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length, malformed: true } : {
305
+ return end2 === -1 ? {
306
+ raw: xml.slice(position),
307
+ role: "comment",
308
+ tag: "",
309
+ end: xml.length,
310
+ malformed: true
311
+ } : {
217
312
  raw: xml.slice(position, end2 + 3),
218
313
  role: "comment",
219
314
  tag: "",
@@ -254,12 +349,28 @@ function extractXmlNodes(xml, position) {
254
349
  const tag2 = trimmed.split(/\s/)[0] ?? "";
255
350
  const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
256
351
  const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
257
- return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: parsed2?.attributes, end, malformed: parsed2?.malformed ? true : void 0 };
352
+ return {
353
+ raw,
354
+ role: "selfTag",
355
+ tag: tag2,
356
+ xmlInner: xmlInner2,
357
+ xmlAttributes: parsed2?.attributes,
358
+ end,
359
+ malformed: parsed2?.malformed ? true : void 0
360
+ };
258
361
  }
259
362
  const tag = inner.split(/\s/)[0] ?? "";
260
363
  const xmlInner = inner.slice(tag.length).trim() || void 0;
261
364
  const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
262
- return { raw, role: "openTag", tag, xmlInner, xmlAttributes: parsed?.attributes, end, malformed: parsed?.malformed ? true : void 0 };
365
+ return {
366
+ raw,
367
+ role: "openTag",
368
+ tag,
369
+ xmlInner,
370
+ xmlAttributes: parsed?.attributes,
371
+ end,
372
+ malformed: parsed?.malformed ? true : void 0
373
+ };
263
374
  }
264
375
 
265
376
  // src/modules/scaffold/types.ts
package/dist/index.js CHANGED
@@ -1,6 +1,70 @@
1
1
  // src/modules/minify/minify.ts
2
2
  function minify(xml) {
3
- return xml.replace(/>(\s+)</g, "><").trim();
3
+ const input = xml.trim();
4
+ if (input === "") return "";
5
+ const tokens = tokenize(input);
6
+ return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
7
+ }
8
+ function isRemovableWhitespace(token, tokens, index) {
9
+ if (token.type !== "text") return false;
10
+ if (!/^\s+$/.test(token.value)) return false;
11
+ if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
12
+ const previous = tokens[index - 1];
13
+ const next = tokens[index + 1];
14
+ return previous?.type === "markup" && next?.type === "markup";
15
+ }
16
+ function tokenize(xml) {
17
+ const tokens = [];
18
+ let position = 0;
19
+ while (position < xml.length) {
20
+ if (xml[position] !== "<") {
21
+ const nextMarkup = xml.indexOf("<", position);
22
+ const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
23
+ tokens.push({ type: "text", value: xml.slice(position, end2) });
24
+ position = end2;
25
+ continue;
26
+ }
27
+ const end = findMarkupEnd(xml, position);
28
+ tokens.push({ type: "markup", value: xml.slice(position, end) });
29
+ position = end;
30
+ }
31
+ return tokens;
32
+ }
33
+ function findMarkupEnd(xml, start) {
34
+ if (xml.startsWith("<!--", start)) {
35
+ const end = xml.indexOf("-->", start + 4);
36
+ return end === -1 ? xml.length : end + 3;
37
+ }
38
+ if (xml.startsWith("<![CDATA[", start)) {
39
+ const end = xml.indexOf("]]>", start + 9);
40
+ return end === -1 ? xml.length : end + 3;
41
+ }
42
+ if (xml.startsWith("<?", start)) {
43
+ const end = xml.indexOf("?>", start + 2);
44
+ return end === -1 ? xml.length : end + 2;
45
+ }
46
+ if (xml.startsWith("<!DOCTYPE", start)) {
47
+ const bracketOpen = xml.indexOf("[", start);
48
+ const firstClose = xml.indexOf(">", start);
49
+ if (bracketOpen !== -1 && bracketOpen < firstClose) {
50
+ const bracketClose = xml.indexOf("]>", bracketOpen + 1);
51
+ return bracketClose === -1 ? xml.length : bracketClose + 2;
52
+ }
53
+ return firstClose === -1 ? xml.length : firstClose + 1;
54
+ }
55
+ let i = start + 1;
56
+ while (i < xml.length) {
57
+ const ch = xml[i];
58
+ if (ch === '"' || ch === "'") {
59
+ const closeQuote = xml.indexOf(ch, i + 1);
60
+ if (closeQuote === -1) return xml.length;
61
+ i = closeQuote + 1;
62
+ continue;
63
+ }
64
+ if (ch === ">") return i + 1;
65
+ i++;
66
+ }
67
+ return xml.length;
4
68
  }
5
69
 
6
70
  // src/modules/render/render.ts
@@ -31,16 +95,29 @@ function buildDataAttrs(node) {
31
95
  function parseXmlAttributes(xmlInner) {
32
96
  const attributes = [];
33
97
  let i = 0;
98
+ let malformed = false;
34
99
  const s = xmlInner.trim();
35
100
  while (i < s.length) {
101
+ const whitespaceStart = i;
36
102
  while (i < s.length && /\s/.test(s[i])) i++;
103
+ const hasSeparatorWhitespace = i > whitespaceStart;
37
104
  if (i >= s.length) break;
105
+ if (attributes.length > 0 && !hasSeparatorWhitespace) {
106
+ malformed = true;
107
+ break;
108
+ }
38
109
  const nameStart = i;
39
110
  while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
40
111
  const name = s.slice(nameStart, i).trim();
41
- if (!name) break;
112
+ if (!name) {
113
+ malformed = true;
114
+ break;
115
+ }
42
116
  while (i < s.length && /\s/.test(s[i])) i++;
43
- if (s[i] !== "=") break;
117
+ if (s[i] !== "=") {
118
+ malformed = true;
119
+ break;
120
+ }
44
121
  i++;
45
122
  while (i < s.length && /\s/.test(s[i])) i++;
46
123
  const quote = s[i];
@@ -53,13 +130,19 @@ function parseXmlAttributes(xmlInner) {
53
130
  i++;
54
131
  const valueStart = i;
55
132
  while (i < s.length && s[i] !== quote) i++;
133
+ if (i >= s.length) {
134
+ return {
135
+ attributes: attributes.length > 0 ? attributes : void 0,
136
+ malformed: true
137
+ };
138
+ }
56
139
  const value = s.slice(valueStart, i);
57
140
  i++;
58
141
  attributes.push({ name, value });
59
142
  }
60
143
  return {
61
144
  attributes: attributes.length > 0 ? attributes : void 0,
62
- malformed: false
145
+ malformed
63
146
  };
64
147
  }
65
148
  var MAX_DEPTH = 500;
@@ -174,7 +257,13 @@ function extractXmlNodes(xml, position) {
174
257
  }
175
258
  if (xml[position + 1] === "!" && xml[position + 2] === "[") {
176
259
  const end2 = xml.indexOf("]]>", position + 3);
177
- return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length, malformed: true } : {
260
+ return end2 === -1 ? {
261
+ raw: xml.slice(position),
262
+ role: "textLeaf",
263
+ tag: "",
264
+ end: xml.length,
265
+ malformed: true
266
+ } : {
178
267
  raw: xml.slice(position, end2 + 3),
179
268
  role: "textLeaf",
180
269
  tag: "",
@@ -183,7 +272,13 @@ function extractXmlNodes(xml, position) {
183
272
  }
184
273
  if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
185
274
  const end2 = xml.indexOf("-->", position + 4);
186
- return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length, malformed: true } : {
275
+ return end2 === -1 ? {
276
+ raw: xml.slice(position),
277
+ role: "comment",
278
+ tag: "",
279
+ end: xml.length,
280
+ malformed: true
281
+ } : {
187
282
  raw: xml.slice(position, end2 + 3),
188
283
  role: "comment",
189
284
  tag: "",
@@ -224,12 +319,28 @@ function extractXmlNodes(xml, position) {
224
319
  const tag2 = trimmed.split(/\s/)[0] ?? "";
225
320
  const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
226
321
  const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
227
- return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: parsed2?.attributes, end, malformed: parsed2?.malformed ? true : void 0 };
322
+ return {
323
+ raw,
324
+ role: "selfTag",
325
+ tag: tag2,
326
+ xmlInner: xmlInner2,
327
+ xmlAttributes: parsed2?.attributes,
328
+ end,
329
+ malformed: parsed2?.malformed ? true : void 0
330
+ };
228
331
  }
229
332
  const tag = inner.split(/\s/)[0] ?? "";
230
333
  const xmlInner = inner.slice(tag.length).trim() || void 0;
231
334
  const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
232
- return { raw, role: "openTag", tag, xmlInner, xmlAttributes: parsed?.attributes, end, malformed: parsed?.malformed ? true : void 0 };
335
+ return {
336
+ raw,
337
+ role: "openTag",
338
+ tag,
339
+ xmlInner,
340
+ xmlAttributes: parsed?.attributes,
341
+ end,
342
+ malformed: parsed?.malformed ? true : void 0
343
+ };
233
344
  }
234
345
 
235
346
  // src/modules/scaffold/types.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xml-to-html-converter",
3
- "version": "0.4.1",
3
+ "version": "0.4.2",
4
4
  "description": "Zero dependency XML to HTML converter for Node environments",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",