xml-to-html-converter 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/index.cjs +119 -8
- package/dist/index.js +119 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
A zero-dependency Node.js package for converting XML to HTML.
|
|
10
10
|
|
|
11
|
-
- **`minify(xml)`**
|
|
11
|
+
- **`minify(xml)`** removes prettification whitespace between markup tokens before parsing. Non-whitespace text content and CDATA are left untouched
|
|
12
12
|
- **`scaffold(xml)`** reads any XML string and returns a nested node tree
|
|
13
13
|
- **`walk(nodes, visitor)`** traverses the full node tree depth-first, visiting every node
|
|
14
14
|
- **`render(nodes)`** converts a node tree to an HTML string. Every XML element becomes a `<div>` with `data-tag` and `data-attrs-*` attributes
|
|
@@ -27,7 +27,7 @@ npm install xml-to-html-converter
|
|
|
27
27
|
|
|
28
28
|
### minify
|
|
29
29
|
|
|
30
|
-
When your XML comes from a file or an API it is usually indented and line-broken. `minify`
|
|
30
|
+
When your XML comes from a file or an API it is usually indented and line-broken. `minify` removes whitespace-only text nodes that include line breaks when they appear between markup tokens. Text content and CDATA are left completely untouched.
|
|
31
31
|
|
|
32
32
|
```js
|
|
33
33
|
import { minify } from "xml-to-html-converter";
|
|
@@ -42,7 +42,7 @@ const clean = minify(`
|
|
|
42
42
|
// <bookstore><book category="cooking"><title lang="en">Everyday Italian</title></book></bookstore>
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
-
`minify` is opt-in. Skip it if whitespace
|
|
45
|
+
`minify` is opt-in. Skip it if whitespace-only nodes between markup tokens are meaningful to your use case.
|
|
46
46
|
|
|
47
47
|
---
|
|
48
48
|
|
|
@@ -233,7 +233,7 @@ Eight cases are handled:
|
|
|
233
233
|
- **Unclosed tags** - opens but never closes, gets `malformed: true`, children are still collected
|
|
234
234
|
- **Stray closing tags** - a `</tag>` with no matching open surfaces as a `closeTag` token with `malformed: true`
|
|
235
235
|
- **Unclosed brackets** - a `<` with no matching `>` captures the remainder as a malformed token
|
|
236
|
-
- **
|
|
236
|
+
- **Malformed attributes** - unquoted values (`<tag attr=unquoted>`), invalid separators (`<tag a="1"b="2">`), trailing junk after valid attributes (`<tag a="1" junk>`), and unclosed quoted values all flag the node `malformed: true`; any valid attributes parsed before the error are preserved
|
|
237
237
|
- **Unclosed processing instructions** - `<?xml ...` with no `?>` captures the remainder as a malformed token
|
|
238
238
|
- **Unclosed comments** - `<!-- ...` with no `-->` captures the remainder as a malformed token
|
|
239
239
|
- **Unclosed CDATA** - `<![CDATA[ ...` with no `]]>` captures the remainder as a malformed token
|
package/dist/index.cjs
CHANGED
|
@@ -30,7 +30,71 @@ module.exports = __toCommonJS(src_exports);
|
|
|
30
30
|
|
|
31
31
|
// src/modules/minify/minify.ts
|
|
32
32
|
function minify(xml) {
|
|
33
|
-
|
|
33
|
+
const input = xml.trim();
|
|
34
|
+
if (input === "") return "";
|
|
35
|
+
const tokens = tokenize(input);
|
|
36
|
+
return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
|
|
37
|
+
}
|
|
38
|
+
function isRemovableWhitespace(token, tokens, index) {
|
|
39
|
+
if (token.type !== "text") return false;
|
|
40
|
+
if (!/^\s+$/.test(token.value)) return false;
|
|
41
|
+
if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
|
|
42
|
+
const previous = tokens[index - 1];
|
|
43
|
+
const next = tokens[index + 1];
|
|
44
|
+
return previous?.type === "markup" && next?.type === "markup";
|
|
45
|
+
}
|
|
46
|
+
function tokenize(xml) {
|
|
47
|
+
const tokens = [];
|
|
48
|
+
let position = 0;
|
|
49
|
+
while (position < xml.length) {
|
|
50
|
+
if (xml[position] !== "<") {
|
|
51
|
+
const nextMarkup = xml.indexOf("<", position);
|
|
52
|
+
const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
|
|
53
|
+
tokens.push({ type: "text", value: xml.slice(position, end2) });
|
|
54
|
+
position = end2;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
const end = findMarkupEnd(xml, position);
|
|
58
|
+
tokens.push({ type: "markup", value: xml.slice(position, end) });
|
|
59
|
+
position = end;
|
|
60
|
+
}
|
|
61
|
+
return tokens;
|
|
62
|
+
}
|
|
63
|
+
function findMarkupEnd(xml, start) {
|
|
64
|
+
if (xml.startsWith("<!--", start)) {
|
|
65
|
+
const end = xml.indexOf("-->", start + 4);
|
|
66
|
+
return end === -1 ? xml.length : end + 3;
|
|
67
|
+
}
|
|
68
|
+
if (xml.startsWith("<![CDATA[", start)) {
|
|
69
|
+
const end = xml.indexOf("]]>", start + 9);
|
|
70
|
+
return end === -1 ? xml.length : end + 3;
|
|
71
|
+
}
|
|
72
|
+
if (xml.startsWith("<?", start)) {
|
|
73
|
+
const end = xml.indexOf("?>", start + 2);
|
|
74
|
+
return end === -1 ? xml.length : end + 2;
|
|
75
|
+
}
|
|
76
|
+
if (xml.startsWith("<!DOCTYPE", start)) {
|
|
77
|
+
const bracketOpen = xml.indexOf("[", start);
|
|
78
|
+
const firstClose = xml.indexOf(">", start);
|
|
79
|
+
if (bracketOpen !== -1 && bracketOpen < firstClose) {
|
|
80
|
+
const bracketClose = xml.indexOf("]>", bracketOpen + 1);
|
|
81
|
+
return bracketClose === -1 ? xml.length : bracketClose + 2;
|
|
82
|
+
}
|
|
83
|
+
return firstClose === -1 ? xml.length : firstClose + 1;
|
|
84
|
+
}
|
|
85
|
+
let i = start + 1;
|
|
86
|
+
while (i < xml.length) {
|
|
87
|
+
const ch = xml[i];
|
|
88
|
+
if (ch === '"' || ch === "'") {
|
|
89
|
+
const closeQuote = xml.indexOf(ch, i + 1);
|
|
90
|
+
if (closeQuote === -1) return xml.length;
|
|
91
|
+
i = closeQuote + 1;
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
if (ch === ">") return i + 1;
|
|
95
|
+
i++;
|
|
96
|
+
}
|
|
97
|
+
return xml.length;
|
|
34
98
|
}
|
|
35
99
|
|
|
36
100
|
// src/modules/render/render.ts
|
|
@@ -61,16 +125,29 @@ function buildDataAttrs(node) {
|
|
|
61
125
|
function parseXmlAttributes(xmlInner) {
|
|
62
126
|
const attributes = [];
|
|
63
127
|
let i = 0;
|
|
128
|
+
let malformed = false;
|
|
64
129
|
const s = xmlInner.trim();
|
|
65
130
|
while (i < s.length) {
|
|
131
|
+
const whitespaceStart = i;
|
|
66
132
|
while (i < s.length && /\s/.test(s[i])) i++;
|
|
133
|
+
const hasSeparatorWhitespace = i > whitespaceStart;
|
|
67
134
|
if (i >= s.length) break;
|
|
135
|
+
if (attributes.length > 0 && !hasSeparatorWhitespace) {
|
|
136
|
+
malformed = true;
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
68
139
|
const nameStart = i;
|
|
69
140
|
while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
|
|
70
141
|
const name = s.slice(nameStart, i).trim();
|
|
71
|
-
if (!name)
|
|
142
|
+
if (!name) {
|
|
143
|
+
malformed = true;
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
72
146
|
while (i < s.length && /\s/.test(s[i])) i++;
|
|
73
|
-
if (s[i] !== "=")
|
|
147
|
+
if (s[i] !== "=") {
|
|
148
|
+
malformed = true;
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
74
151
|
i++;
|
|
75
152
|
while (i < s.length && /\s/.test(s[i])) i++;
|
|
76
153
|
const quote = s[i];
|
|
@@ -83,13 +160,19 @@ function parseXmlAttributes(xmlInner) {
|
|
|
83
160
|
i++;
|
|
84
161
|
const valueStart = i;
|
|
85
162
|
while (i < s.length && s[i] !== quote) i++;
|
|
163
|
+
if (i >= s.length) {
|
|
164
|
+
return {
|
|
165
|
+
attributes: attributes.length > 0 ? attributes : void 0,
|
|
166
|
+
malformed: true
|
|
167
|
+
};
|
|
168
|
+
}
|
|
86
169
|
const value = s.slice(valueStart, i);
|
|
87
170
|
i++;
|
|
88
171
|
attributes.push({ name, value });
|
|
89
172
|
}
|
|
90
173
|
return {
|
|
91
174
|
attributes: attributes.length > 0 ? attributes : void 0,
|
|
92
|
-
malformed
|
|
175
|
+
malformed
|
|
93
176
|
};
|
|
94
177
|
}
|
|
95
178
|
var MAX_DEPTH = 500;
|
|
@@ -204,7 +287,13 @@ function extractXmlNodes(xml, position) {
|
|
|
204
287
|
}
|
|
205
288
|
if (xml[position + 1] === "!" && xml[position + 2] === "[") {
|
|
206
289
|
const end2 = xml.indexOf("]]>", position + 3);
|
|
207
|
-
return end2 === -1 ? {
|
|
290
|
+
return end2 === -1 ? {
|
|
291
|
+
raw: xml.slice(position),
|
|
292
|
+
role: "textLeaf",
|
|
293
|
+
tag: "",
|
|
294
|
+
end: xml.length,
|
|
295
|
+
malformed: true
|
|
296
|
+
} : {
|
|
208
297
|
raw: xml.slice(position, end2 + 3),
|
|
209
298
|
role: "textLeaf",
|
|
210
299
|
tag: "",
|
|
@@ -213,7 +302,13 @@ function extractXmlNodes(xml, position) {
|
|
|
213
302
|
}
|
|
214
303
|
if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
|
|
215
304
|
const end2 = xml.indexOf("-->", position + 4);
|
|
216
|
-
return end2 === -1 ? {
|
|
305
|
+
return end2 === -1 ? {
|
|
306
|
+
raw: xml.slice(position),
|
|
307
|
+
role: "comment",
|
|
308
|
+
tag: "",
|
|
309
|
+
end: xml.length,
|
|
310
|
+
malformed: true
|
|
311
|
+
} : {
|
|
217
312
|
raw: xml.slice(position, end2 + 3),
|
|
218
313
|
role: "comment",
|
|
219
314
|
tag: "",
|
|
@@ -254,12 +349,28 @@ function extractXmlNodes(xml, position) {
|
|
|
254
349
|
const tag2 = trimmed.split(/\s/)[0] ?? "";
|
|
255
350
|
const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
|
|
256
351
|
const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
|
|
257
|
-
return {
|
|
352
|
+
return {
|
|
353
|
+
raw,
|
|
354
|
+
role: "selfTag",
|
|
355
|
+
tag: tag2,
|
|
356
|
+
xmlInner: xmlInner2,
|
|
357
|
+
xmlAttributes: parsed2?.attributes,
|
|
358
|
+
end,
|
|
359
|
+
malformed: parsed2?.malformed ? true : void 0
|
|
360
|
+
};
|
|
258
361
|
}
|
|
259
362
|
const tag = inner.split(/\s/)[0] ?? "";
|
|
260
363
|
const xmlInner = inner.slice(tag.length).trim() || void 0;
|
|
261
364
|
const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
|
|
262
|
-
return {
|
|
365
|
+
return {
|
|
366
|
+
raw,
|
|
367
|
+
role: "openTag",
|
|
368
|
+
tag,
|
|
369
|
+
xmlInner,
|
|
370
|
+
xmlAttributes: parsed?.attributes,
|
|
371
|
+
end,
|
|
372
|
+
malformed: parsed?.malformed ? true : void 0
|
|
373
|
+
};
|
|
263
374
|
}
|
|
264
375
|
|
|
265
376
|
// src/modules/scaffold/types.ts
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,70 @@
|
|
|
1
1
|
// src/modules/minify/minify.ts
|
|
2
2
|
function minify(xml) {
|
|
3
|
-
|
|
3
|
+
const input = xml.trim();
|
|
4
|
+
if (input === "") return "";
|
|
5
|
+
const tokens = tokenize(input);
|
|
6
|
+
return tokens.filter((token, index) => !isRemovableWhitespace(token, tokens, index)).map((token) => token.value).join("");
|
|
7
|
+
}
|
|
8
|
+
function isRemovableWhitespace(token, tokens, index) {
|
|
9
|
+
if (token.type !== "text") return false;
|
|
10
|
+
if (!/^\s+$/.test(token.value)) return false;
|
|
11
|
+
if (!token.value.includes("\n") && !token.value.includes("\r")) return false;
|
|
12
|
+
const previous = tokens[index - 1];
|
|
13
|
+
const next = tokens[index + 1];
|
|
14
|
+
return previous?.type === "markup" && next?.type === "markup";
|
|
15
|
+
}
|
|
16
|
+
function tokenize(xml) {
|
|
17
|
+
const tokens = [];
|
|
18
|
+
let position = 0;
|
|
19
|
+
while (position < xml.length) {
|
|
20
|
+
if (xml[position] !== "<") {
|
|
21
|
+
const nextMarkup = xml.indexOf("<", position);
|
|
22
|
+
const end2 = nextMarkup === -1 ? xml.length : nextMarkup;
|
|
23
|
+
tokens.push({ type: "text", value: xml.slice(position, end2) });
|
|
24
|
+
position = end2;
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const end = findMarkupEnd(xml, position);
|
|
28
|
+
tokens.push({ type: "markup", value: xml.slice(position, end) });
|
|
29
|
+
position = end;
|
|
30
|
+
}
|
|
31
|
+
return tokens;
|
|
32
|
+
}
|
|
33
|
+
function findMarkupEnd(xml, start) {
|
|
34
|
+
if (xml.startsWith("<!--", start)) {
|
|
35
|
+
const end = xml.indexOf("-->", start + 4);
|
|
36
|
+
return end === -1 ? xml.length : end + 3;
|
|
37
|
+
}
|
|
38
|
+
if (xml.startsWith("<![CDATA[", start)) {
|
|
39
|
+
const end = xml.indexOf("]]>", start + 9);
|
|
40
|
+
return end === -1 ? xml.length : end + 3;
|
|
41
|
+
}
|
|
42
|
+
if (xml.startsWith("<?", start)) {
|
|
43
|
+
const end = xml.indexOf("?>", start + 2);
|
|
44
|
+
return end === -1 ? xml.length : end + 2;
|
|
45
|
+
}
|
|
46
|
+
if (xml.startsWith("<!DOCTYPE", start)) {
|
|
47
|
+
const bracketOpen = xml.indexOf("[", start);
|
|
48
|
+
const firstClose = xml.indexOf(">", start);
|
|
49
|
+
if (bracketOpen !== -1 && bracketOpen < firstClose) {
|
|
50
|
+
const bracketClose = xml.indexOf("]>", bracketOpen + 1);
|
|
51
|
+
return bracketClose === -1 ? xml.length : bracketClose + 2;
|
|
52
|
+
}
|
|
53
|
+
return firstClose === -1 ? xml.length : firstClose + 1;
|
|
54
|
+
}
|
|
55
|
+
let i = start + 1;
|
|
56
|
+
while (i < xml.length) {
|
|
57
|
+
const ch = xml[i];
|
|
58
|
+
if (ch === '"' || ch === "'") {
|
|
59
|
+
const closeQuote = xml.indexOf(ch, i + 1);
|
|
60
|
+
if (closeQuote === -1) return xml.length;
|
|
61
|
+
i = closeQuote + 1;
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
if (ch === ">") return i + 1;
|
|
65
|
+
i++;
|
|
66
|
+
}
|
|
67
|
+
return xml.length;
|
|
4
68
|
}
|
|
5
69
|
|
|
6
70
|
// src/modules/render/render.ts
|
|
@@ -31,16 +95,29 @@ function buildDataAttrs(node) {
|
|
|
31
95
|
function parseXmlAttributes(xmlInner) {
|
|
32
96
|
const attributes = [];
|
|
33
97
|
let i = 0;
|
|
98
|
+
let malformed = false;
|
|
34
99
|
const s = xmlInner.trim();
|
|
35
100
|
while (i < s.length) {
|
|
101
|
+
const whitespaceStart = i;
|
|
36
102
|
while (i < s.length && /\s/.test(s[i])) i++;
|
|
103
|
+
const hasSeparatorWhitespace = i > whitespaceStart;
|
|
37
104
|
if (i >= s.length) break;
|
|
105
|
+
if (attributes.length > 0 && !hasSeparatorWhitespace) {
|
|
106
|
+
malformed = true;
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
38
109
|
const nameStart = i;
|
|
39
110
|
while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
|
|
40
111
|
const name = s.slice(nameStart, i).trim();
|
|
41
|
-
if (!name)
|
|
112
|
+
if (!name) {
|
|
113
|
+
malformed = true;
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
42
116
|
while (i < s.length && /\s/.test(s[i])) i++;
|
|
43
|
-
if (s[i] !== "=")
|
|
117
|
+
if (s[i] !== "=") {
|
|
118
|
+
malformed = true;
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
44
121
|
i++;
|
|
45
122
|
while (i < s.length && /\s/.test(s[i])) i++;
|
|
46
123
|
const quote = s[i];
|
|
@@ -53,13 +130,19 @@ function parseXmlAttributes(xmlInner) {
|
|
|
53
130
|
i++;
|
|
54
131
|
const valueStart = i;
|
|
55
132
|
while (i < s.length && s[i] !== quote) i++;
|
|
133
|
+
if (i >= s.length) {
|
|
134
|
+
return {
|
|
135
|
+
attributes: attributes.length > 0 ? attributes : void 0,
|
|
136
|
+
malformed: true
|
|
137
|
+
};
|
|
138
|
+
}
|
|
56
139
|
const value = s.slice(valueStart, i);
|
|
57
140
|
i++;
|
|
58
141
|
attributes.push({ name, value });
|
|
59
142
|
}
|
|
60
143
|
return {
|
|
61
144
|
attributes: attributes.length > 0 ? attributes : void 0,
|
|
62
|
-
malformed
|
|
145
|
+
malformed
|
|
63
146
|
};
|
|
64
147
|
}
|
|
65
148
|
var MAX_DEPTH = 500;
|
|
@@ -174,7 +257,13 @@ function extractXmlNodes(xml, position) {
|
|
|
174
257
|
}
|
|
175
258
|
if (xml[position + 1] === "!" && xml[position + 2] === "[") {
|
|
176
259
|
const end2 = xml.indexOf("]]>", position + 3);
|
|
177
|
-
return end2 === -1 ? {
|
|
260
|
+
return end2 === -1 ? {
|
|
261
|
+
raw: xml.slice(position),
|
|
262
|
+
role: "textLeaf",
|
|
263
|
+
tag: "",
|
|
264
|
+
end: xml.length,
|
|
265
|
+
malformed: true
|
|
266
|
+
} : {
|
|
178
267
|
raw: xml.slice(position, end2 + 3),
|
|
179
268
|
role: "textLeaf",
|
|
180
269
|
tag: "",
|
|
@@ -183,7 +272,13 @@ function extractXmlNodes(xml, position) {
|
|
|
183
272
|
}
|
|
184
273
|
if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
|
|
185
274
|
const end2 = xml.indexOf("-->", position + 4);
|
|
186
|
-
return end2 === -1 ? {
|
|
275
|
+
return end2 === -1 ? {
|
|
276
|
+
raw: xml.slice(position),
|
|
277
|
+
role: "comment",
|
|
278
|
+
tag: "",
|
|
279
|
+
end: xml.length,
|
|
280
|
+
malformed: true
|
|
281
|
+
} : {
|
|
187
282
|
raw: xml.slice(position, end2 + 3),
|
|
188
283
|
role: "comment",
|
|
189
284
|
tag: "",
|
|
@@ -224,12 +319,28 @@ function extractXmlNodes(xml, position) {
|
|
|
224
319
|
const tag2 = trimmed.split(/\s/)[0] ?? "";
|
|
225
320
|
const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
|
|
226
321
|
const parsed2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
|
|
227
|
-
return {
|
|
322
|
+
return {
|
|
323
|
+
raw,
|
|
324
|
+
role: "selfTag",
|
|
325
|
+
tag: tag2,
|
|
326
|
+
xmlInner: xmlInner2,
|
|
327
|
+
xmlAttributes: parsed2?.attributes,
|
|
328
|
+
end,
|
|
329
|
+
malformed: parsed2?.malformed ? true : void 0
|
|
330
|
+
};
|
|
228
331
|
}
|
|
229
332
|
const tag = inner.split(/\s/)[0] ?? "";
|
|
230
333
|
const xmlInner = inner.slice(tag.length).trim() || void 0;
|
|
231
334
|
const parsed = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
|
|
232
|
-
return {
|
|
335
|
+
return {
|
|
336
|
+
raw,
|
|
337
|
+
role: "openTag",
|
|
338
|
+
tag,
|
|
339
|
+
xmlInner,
|
|
340
|
+
xmlAttributes: parsed?.attributes,
|
|
341
|
+
end,
|
|
342
|
+
malformed: parsed?.malformed ? true : void 0
|
|
343
|
+
};
|
|
233
344
|
}
|
|
234
345
|
|
|
235
346
|
// src/modules/scaffold/types.ts
|