xml-to-html-converter 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  A zero-dependency Node.js package for converting XML to HTML.
10
10
 
11
- - **`minify(xml)`** strips inter-tag whitespace from prettified XML before parsing. Text content is left untouched
11
+ - **`minify(xml)`** removes prettification whitespace between markup tokens before parsing. Non-whitespace text content and CDATA are left untouched
12
12
  - **`scaffold(xml)`** reads any XML string and returns a nested node tree
13
13
  - **`walk(nodes, visitor)`** traverses the full node tree depth-first, visiting every node
14
14
  - **`render(nodes)`** converts a node tree to an HTML string. Every XML element becomes a `<div>` with `data-tag` and `data-attrs-*` attributes
@@ -27,7 +27,7 @@ npm install xml-to-html-converter
27
27
 
28
28
  ### minify
29
29
 
30
- When your XML comes from a file or an API it is usually indented and line-broken. `minify` strips the whitespace between tags before parsing. Text content is left completely untouched.
30
+ When your XML comes from a file or an API it is usually indented and line-broken. `minify` removes whitespace-only text nodes that include line breaks when they appear between markup tokens. Text content and CDATA are left completely untouched.
31
31
 
32
32
  ```js
33
33
  import { minify } from "xml-to-html-converter";
@@ -42,7 +42,7 @@ const clean = minify(`
42
42
  // <bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book></bookstore>
43
43
  ```
44
44
 
45
- `minify` is opt-in. Skip it if whitespace inside your content is meaningful.
45
+ `minify` is opt-in. Skip it if whitespace-only nodes between markup tokens are meaningful to your use case.
46
46
 
47
47
  ---
48
48
 
@@ -172,7 +172,7 @@ const html = render(
172
172
  </div>
173
173
  ```
174
174
 
175
- Processing instructions and doctypes are dropped. Comments are passed through unchanged.
175
+ Processing instructions and doctypes are dropped. Comments are passed through unchanged. The output is a raw HTML string — if you are inserting it into a web page, treat it accordingly.
176
176
 
177
177
  ---
178
178
 
@@ -228,11 +228,15 @@ Every node in the tree has the following fields:
228
228
 
229
229
  `scaffold` never throws. No matter what the input looks like, it always returns a complete tree. Malformed structures are flagged with `malformed: true` in place and the walk continues.
230
230
 
231
- Four cases are handled:
231
+ Eight cases are handled:
232
232
 
233
233
  - **Unclosed tags** - opens but never closes, gets `malformed: true`, children are still collected
234
234
  - **Stray closing tags** - a `</tag>` with no matching open surfaces as a `closeTag` token with `malformed: true`
235
235
  - **Unclosed brackets** - a `<` with no matching `>` captures the remainder as a malformed token
236
+ - **Malformed attributes** - unquoted values (`<tag attr=unquoted>`), invalid separators (`<tag a="1"b="2">`), trailing junk after valid attributes (`<tag a="1" junk>`), and unclosed quoted values all flag the node `malformed: true`; any valid attributes parsed before the error are preserved
237
+ - **Unclosed processing instructions** - `<?xml ...` with no `?>` captures the remainder as a malformed token
238
+ - **Unclosed comments** - `<!-- ...` with no `-->` captures the remainder as a malformed token
239
+ - **Unclosed CDATA** - `<![CDATA[ ...` with no `]]>` captures the remainder as a malformed token
236
240
  - **Excessive nesting** - documents nested beyond 500 levels have the deepest open tag flagged `malformed: true` to prevent a stack overflow
237
241
 
238
242
  ```js
package/dist/index.cjs CHANGED
@@ -30,7 +30,71 @@ module.exports = __toCommonJS(src_exports);
30
30
 
31
31
  // src/modules/minify/minify.ts
32
32
  function minify(xml) {
33
- return xml.replace(/>(\s+)</g, (_, gap) => gap.trim() === "" ? "><" : `>${gap}<`).trim();
33
+ const input = xml.trim();
34
+ if (input === "") return "";
35
+ const tokens = tokenize(input);
36
+ return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
37
+ }
38
+ function isRemovableWhitespace(token, tokens, index) {
39
+ if (token.type !== "text") return false;
40
+ if (!/^\s+$/.test(token.value)) return false;
41
+ if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
42
+ const previous = tokens[index - 1];
43
+ const next = tokens[index + 1];
44
+ return previous?.type === "markup" && next?.type === "markup";
45
+ }
46
+ function tokenize(xml) {
47
+ const tokens = [];
48
+ let position = 0;
49
+ while (position < xml.length) {
50
+ if (xml[position] !== "<") {
51
+ const nextMarkup = xml.indexOf("<", position);
52
+ const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
53
+ tokens.push({ type: "text", value: xml.slice(position, end2) });
54
+ position = end2;
55
+ continue;
56
+ }
57
+ const end = findMarkupEnd(xml, position);
58
+ tokens.push({ type: "markup", value: xml.slice(position, end) });
59
+ position = end;
60
+ }
61
+ return tokens;
62
+ }
63
+ function findMarkupEnd(xml, start) {
64
+ if (xml.startsWith("<!--", start)) {
65
+ const end = xml.indexOf("-->", start + 4);
66
+ return end === -1 ? xml.length : end + 3;
67
+ }
68
+ if (xml.startsWith("<![CDATA[", start)) {
69
+ const end = xml.indexOf("]]>", start + 9);
70
+ return end === -1 ? xml.length : end + 3;
71
+ }
72
+ if (xml.startsWith("<?", start)) {
73
+ const end = xml.indexOf("?>", start + 2);
74
+ return end === -1 ? xml.length : end + 2;
75
+ }
76
+ if (xml.startsWith("<!DOCTYPE", start)) {
77
+ const bracketOpen = xml.indexOf("[", start);
78
+ const firstClose = xml.indexOf(">", start);
79
+ if (bracketOpen !== -1 && bracketOpen < firstClose) {
80
+ const bracketClose = xml.indexOf("]>", bracketOpen + 1);
81
+ return bracketClose === -1 ? xml.length : bracketClose + 2;
82
+ }
83
+ return firstClose === -1 ? xml.length : firstClose + 1;
84
+ }
85
+ let i = start + 1;
86
+ while (i < xml.length) {
87
+ const ch = xml[i];
88
+ if (ch === '"' || ch === "'") {
89
+ const closeQuote = xml.indexOf(ch, i + 1);
90
+ if (closeQuote === -1) return xml.length;
91
+ i = closeQuote + 1;
92
+ continue;
93
+ }
94
+ if (ch === ">") return i + 1;
95
+ i++;
96
+ }
97
+ return xml.length;
34
98
  }
35
99
 
36
100
  // src/modules/render/render.ts
@@ -61,28 +125,55 @@ function buildDataAttrs(node) {
61
125
  function parseXmlAttributes(xmlInner) {
62
126
  const attributes = [];
63
127
  let i = 0;
128
+ let malformed = false;
64
129
  const s = xmlInner.trim();
65
130
  while (i < s.length) {
131
+ const whitespaceStart = i;
66
132
  while (i < s.length && /\s/.test(s[i])) i++;
133
+ const hasSeparatorWhitespace = i > whitespaceStart;
67
134
  if (i >= s.length) break;
135
+ if (attributes.length > 0 && !hasSeparatorWhitespace) {
136
+ malformed = true;
137
+ break;
138
+ }
68
139
  const nameStart = i;
69
140
  while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
70
141
  const name = s.slice(nameStart, i).trim();
71
- if (!name) break;
142
+ if (!name) {
143
+ malformed = true;
144
+ break;
145
+ }
72
146
  while (i < s.length && /\s/.test(s[i])) i++;
73
- if (s[i] !== "=") break;
147
+ if (s[i] !== "=") {
148
+ malformed = true;
149
+ break;
150
+ }
74
151
  i++;
75
152
  while (i < s.length && /\s/.test(s[i])) i++;
76
153
  const quote = s[i];
77
- if (quote !== '"' && quote !== "'") break;
154
+ if (quote !== '"' && quote !== "'") {
155
+ return {
156
+ attributes: attributes.length > 0 ? attributes : void 0,
157
+ malformed: true
158
+ };
159
+ }
78
160
  i++;
79
161
  const valueStart = i;
80
162
  while (i < s.length && s[i] !== quote) i++;
163
+ if (i >= s.length) {
164
+ return {
165
+ attributes: attributes.length > 0 ? attributes : void 0,
166
+ malformed: true
167
+ };
168
+ }
81
169
  const value = s.slice(valueStart, i);
82
170
  i++;
83
171
  attributes.push({ name, value });
84
172
  }
85
- return attributes.length > 0 ? attributes : void 0;
173
+ return {
174
+ attributes: attributes.length > 0 ? attributes : void 0,
175
+ malformed
176
+ };
86
177
  }
87
178
  var MAX_DEPTH = 500;
88
179
  function scaffold(xml) {
@@ -185,7 +276,8 @@ function extractXmlNodes(xml, position) {
185
276
  raw: xml.slice(position),
186
277
  role: "processingInstruction",
187
278
  tag: "",
188
- end: xml.length
279
+ end: xml.length,
280
+ malformed: true
189
281
  } : {
190
282
  raw: xml.slice(position, end2 + 2),
191
283
  role: "processingInstruction",
@@ -195,7 +287,13 @@ function extractXmlNodes(xml, position) {
195
287
  }
196
288
  if (xml[position + 1] === "!" && xml[position + 2] === "[") {
197
289
  const end2 = xml.indexOf("]]>", position + 3);
198
- return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length } : {
290
+ return end2 === -1 ? {
291
+ raw: xml.slice(position),
292
+ role: "textLeaf",
293
+ tag: "",
294
+ end: xml.length,
295
+ malformed: true
296
+ } : {
199
297
  raw: xml.slice(position, end2 + 3),
200
298
  role: "textLeaf",
201
299
  tag: "",
@@ -204,7 +302,13 @@ function extractXmlNodes(xml, position) {
204
302
  }
205
303
  if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
206
304
  const end2 = xml.indexOf("-->", position + 4);
207
- return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length } : {
305
+ return end2 === -1 ? {
306
+ raw: xml.slice(position),
307
+ role: "comment",
308
+ tag: "",
309
+ end: xml.length,
310
+ malformed: true
311
+ } : {
208
312
  raw: xml.slice(position, end2 + 3),
209
313
  role: "comment",
210
314
  tag: "",
@@ -244,13 +348,29 @@ function extractXmlNodes(xml, position) {
244
348
  const trimmed = inner.slice(0, -1).trim();
245
349
  const tag2 = trimmed.split(/\s/)[0] ?? "";
246
350
  const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
247
- const xmlAttributes2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
248
- return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: xmlAttributes2, end };
351
+ const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
352
+ return {
353
+ raw,
354
+ role: "selfTag",
355
+ tag: tag2,
356
+ xmlInner: xmlInner2,
357
+ xmlAttributes: parsed2?.attributes,
358
+ end,
359
+ malformed: parsed2?.malformed ? true : void 0
360
+ };
249
361
  }
250
362
  const tag = inner.split(/\s/)[0] ?? "";
251
363
  const xmlInner = inner.slice(tag.length).trim() || void 0;
252
- const xmlAttributes = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
253
- return { raw, role: "openTag", tag, xmlInner, xmlAttributes, end };
364
+ const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
365
+ return {
366
+ raw,
367
+ role: "openTag",
368
+ tag,
369
+ xmlInner,
370
+ xmlAttributes: parsed?.attributes,
371
+ end,
372
+ malformed: parsed?.malformed ? true : void 0
373
+ };
254
374
  }
255
375
 
256
376
  // src/modules/scaffold/types.ts
package/dist/index.js CHANGED
@@ -1,6 +1,70 @@
1
1
  // src/modules/minify/minify.ts
2
2
  function minify(xml) {
3
- return xml.replace(/>(\s+)</g, (_, gap) => gap.trim() === "" ? "><" : `>${gap}<`).trim();
3
+ const input = xml.trim();
4
+ if (input === "") return "";
5
+ const tokens = tokenize(input);
6
+ return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
7
+ }
8
+ function isRemovableWhitespace(token, tokens, index) {
9
+ if (token.type !== "text") return false;
10
+ if (!/^\s+$/.test(token.value)) return false;
11
+ if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
12
+ const previous = tokens[index - 1];
13
+ const next = tokens[index + 1];
14
+ return previous?.type === "markup" && next?.type === "markup";
15
+ }
16
+ function tokenize(xml) {
17
+ const tokens = [];
18
+ let position = 0;
19
+ while (position < xml.length) {
20
+ if (xml[position] !== "<") {
21
+ const nextMarkup = xml.indexOf("<", position);
22
+ const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
23
+ tokens.push({ type: "text", value: xml.slice(position, end2) });
24
+ position = end2;
25
+ continue;
26
+ }
27
+ const end = findMarkupEnd(xml, position);
28
+ tokens.push({ type: "markup", value: xml.slice(position, end) });
29
+ position = end;
30
+ }
31
+ return tokens;
32
+ }
33
+ function findMarkupEnd(xml, start) {
34
+ if (xml.startsWith("<!--", start)) {
35
+ const end = xml.indexOf("-->", start + 4);
36
+ return end === -1 ? xml.length : end + 3;
37
+ }
38
+ if (xml.startsWith("<![CDATA[", start)) {
39
+ const end = xml.indexOf("]]>", start + 9);
40
+ return end === -1 ? xml.length : end + 3;
41
+ }
42
+ if (xml.startsWith("<?", start)) {
43
+ const end = xml.indexOf("?>", start + 2);
44
+ return end === -1 ? xml.length : end + 2;
45
+ }
46
+ if (xml.startsWith("<!DOCTYPE", start)) {
47
+ const bracketOpen = xml.indexOf("[", start);
48
+ const firstClose = xml.indexOf(">", start);
49
+ if (bracketOpen !== -1 && bracketOpen < firstClose) {
50
+ const bracketClose = xml.indexOf("]>", bracketOpen + 1);
51
+ return bracketClose === -1 ? xml.length : bracketClose + 2;
52
+ }
53
+ return firstClose === -1 ? xml.length : firstClose + 1;
54
+ }
55
+ let i = start + 1;
56
+ while (i < xml.length) {
57
+ const ch = xml[i];
58
+ if (ch === '"' || ch === "'") {
59
+ const closeQuote = xml.indexOf(ch, i + 1);
60
+ if (closeQuote === -1) return xml.length;
61
+ i = closeQuote + 1;
62
+ continue;
63
+ }
64
+ if (ch === ">") return i + 1;
65
+ i++;
66
+ }
67
+ return xml.length;
4
68
  }
5
69
 
6
70
  // src/modules/render/render.ts
@@ -31,28 +95,55 @@ function buildDataAttrs(node) {
31
95
  function parseXmlAttributes(xmlInner) {
32
96
  const attributes = [];
33
97
  let i = 0;
98
+ let malformed = false;
34
99
  const s = xmlInner.trim();
35
100
  while (i < s.length) {
101
+ const whitespaceStart = i;
36
102
  while (i < s.length && /\s/.test(s[i])) i++;
103
+ const hasSeparatorWhitespace = i > whitespaceStart;
37
104
  if (i >= s.length) break;
105
+ if (attributes.length > 0 && !hasSeparatorWhitespace) {
106
+ malformed = true;
107
+ break;
108
+ }
38
109
  const nameStart = i;
39
110
  while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
40
111
  const name = s.slice(nameStart, i).trim();
41
- if (!name) break;
112
+ if (!name) {
113
+ malformed = true;
114
+ break;
115
+ }
42
116
  while (i < s.length && /\s/.test(s[i])) i++;
43
- if (s[i] !== "=") break;
117
+ if (s[i] !== "=") {
118
+ malformed = true;
119
+ break;
120
+ }
44
121
  i++;
45
122
  while (i < s.length && /\s/.test(s[i])) i++;
46
123
  const quote = s[i];
47
- if (quote !== '"' && quote !== "'") break;
124
+ if (quote !== '"' && quote !== "'") {
125
+ return {
126
+ attributes: attributes.length > 0 ? attributes : void 0,
127
+ malformed: true
128
+ };
129
+ }
48
130
  i++;
49
131
  const valueStart = i;
50
132
  while (i < s.length && s[i] !== quote) i++;
133
+ if (i >= s.length) {
134
+ return {
135
+ attributes: attributes.length > 0 ? attributes : void 0,
136
+ malformed: true
137
+ };
138
+ }
51
139
  const value = s.slice(valueStart, i);
52
140
  i++;
53
141
  attributes.push({ name, value });
54
142
  }
55
- return attributes.length > 0 ? attributes : void 0;
143
+ return {
144
+ attributes: attributes.length > 0 ? attributes : void 0,
145
+ malformed
146
+ };
56
147
  }
57
148
  var MAX_DEPTH = 500;
58
149
  function scaffold(xml) {
@@ -155,7 +246,8 @@ function extractXmlNodes(xml, position) {
155
246
  raw: xml.slice(position),
156
247
  role: "processingInstruction",
157
248
  tag: "",
158
- end: xml.length
249
+ end: xml.length,
250
+ malformed: true
159
251
  } : {
160
252
  raw: xml.slice(position, end2 + 2),
161
253
  role: "processingInstruction",
@@ -165,7 +257,13 @@ function extractXmlNodes(xml, position) {
165
257
  }
166
258
  if (xml[position + 1] === "!" && xml[position + 2] === "[") {
167
259
  const end2 = xml.indexOf("]]>", position + 3);
168
- return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length } : {
260
+ return end2 === -1 ? {
261
+ raw: xml.slice(position),
262
+ role: "textLeaf",
263
+ tag: "",
264
+ end: xml.length,
265
+ malformed: true
266
+ } : {
169
267
  raw: xml.slice(position, end2 + 3),
170
268
  role: "textLeaf",
171
269
  tag: "",
@@ -174,7 +272,13 @@ function extractXmlNodes(xml, position) {
174
272
  }
175
273
  if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
176
274
  const end2 = xml.indexOf("-->", position + 4);
177
- return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length } : {
275
+ return end2 === -1 ? {
276
+ raw: xml.slice(position),
277
+ role: "comment",
278
+ tag: "",
279
+ end: xml.length,
280
+ malformed: true
281
+ } : {
178
282
  raw: xml.slice(position, end2 + 3),
179
283
  role: "comment",
180
284
  tag: "",
@@ -214,13 +318,29 @@ function extractXmlNodes(xml, position) {
214
318
  const trimmed = inner.slice(0, -1).trim();
215
319
  const tag2 = trimmed.split(/\s/)[0] ?? "";
216
320
  const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
217
- const xmlAttributes2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
218
- return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: xmlAttributes2, end };
321
+ const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
322
+ return {
323
+ raw,
324
+ role: "selfTag",
325
+ tag: tag2,
326
+ xmlInner: xmlInner2,
327
+ xmlAttributes: parsed2?.attributes,
328
+ end,
329
+ malformed: parsed2?.malformed ? true : void 0
330
+ };
219
331
  }
220
332
  const tag = inner.split(/\s/)[0] ?? "";
221
333
  const xmlInner = inner.slice(tag.length).trim() || void 0;
222
- const xmlAttributes = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
223
- return { raw, role: "openTag", tag, xmlInner, xmlAttributes, end };
334
+ const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
335
+ return {
336
+ raw,
337
+ role: "openTag",
338
+ tag,
339
+ xmlInner,
340
+ xmlAttributes: parsed?.attributes,
341
+ end,
342
+ malformed: parsed?.malformed ? true : void 0
343
+ };
224
344
  }
225
345
 
226
346
  // src/modules/scaffold/types.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xml-to-html-converter",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "description": "Zero dependency XML to HTML converter for Node environments",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",