@eksml/xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +588 -0
- package/dist/converters/fromLossless.d.mts +14 -0
- package/dist/converters/fromLossless.d.mts.map +1 -0
- package/dist/converters/fromLossless.mjs +35 -0
- package/dist/converters/fromLossless.mjs.map +1 -0
- package/dist/converters/fromLossy.d.mts +18 -0
- package/dist/converters/fromLossy.d.mts.map +1 -0
- package/dist/converters/fromLossy.mjs +91 -0
- package/dist/converters/fromLossy.mjs.map +1 -0
- package/dist/converters/lossless.d.mts +39 -0
- package/dist/converters/lossless.d.mts.map +1 -0
- package/dist/converters/lossless.mjs +74 -0
- package/dist/converters/lossless.mjs.map +1 -0
- package/dist/converters/lossy.d.mts +42 -0
- package/dist/converters/lossy.d.mts.map +1 -0
- package/dist/converters/lossy.mjs +158 -0
- package/dist/converters/lossy.mjs.map +1 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
- package/dist/parser-BfdEfWDg.d.mts +95 -0
- package/dist/parser-BfdEfWDg.d.mts.map +1 -0
- package/dist/parser-CYq309aR.mjs +479 -0
- package/dist/parser-CYq309aR.mjs.map +1 -0
- package/dist/parser.d.mts +2 -0
- package/dist/parser.mjs +2 -0
- package/dist/sax.d.mts +64 -0
- package/dist/sax.d.mts.map +1 -0
- package/dist/sax.mjs +70 -0
- package/dist/sax.mjs.map +1 -0
- package/dist/saxEngine-BDnD7ruG.mjs +750 -0
- package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
- package/dist/utilities/index.d.mts +88 -0
- package/dist/utilities/index.d.mts.map +1 -0
- package/dist/utilities/index.mjs +87 -0
- package/dist/utilities/index.mjs.map +1 -0
- package/dist/writer.d.mts +58 -0
- package/dist/writer.d.mts.map +1 -0
- package/dist/writer.mjs +357 -0
- package/dist/writer.mjs.map +1 -0
- package/dist/xmlParseStream.d.mts +138 -0
- package/dist/xmlParseStream.d.mts.map +1 -0
- package/dist/xmlParseStream.mjs +313 -0
- package/dist/xmlParseStream.mjs.map +1 -0
- package/package.json +100 -0
- package/src/converters/fromLossless.ts +80 -0
- package/src/converters/fromLossy.ts +180 -0
- package/src/converters/lossless.ts +116 -0
- package/src/converters/lossy.ts +274 -0
- package/src/parser.ts +728 -0
- package/src/sax.ts +157 -0
- package/src/saxEngine.ts +1157 -0
- package/src/utilities/escapeRegExp.ts +19 -0
- package/src/utilities/filter.ts +63 -0
- package/src/utilities/getElementById.ts +21 -0
- package/src/utilities/getElementsByClassName.ts +22 -0
- package/src/utilities/htmlConstants.ts +26 -0
- package/src/utilities/index.ts +7 -0
- package/src/utilities/isElementNode.ts +19 -0
- package/src/utilities/isTextNode.ts +19 -0
- package/src/utilities/toContentString.ts +23 -0
- package/src/writer.ts +650 -0
- package/src/xmlParseStream.ts +597 -0
package/dist/writer.mjs
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
import { n as HTML_VOID_ELEMENTS } from "./htmlConstants-D6fsKbZ-.mjs";
|
|
2
|
+
import { fromLossy } from "./converters/fromLossy.mjs";
|
|
3
|
+
import { fromLossless } from "./converters/fromLossless.mjs";
|
|
4
|
+
import { escapeAttribute, escapeText, escapeUTF8 } from "entities";
|
|
5
|
+
//#region src/writer.ts
|
|
6
|
+
const BANG = 33;
|
|
7
|
+
const QUESTION = 63;
|
|
8
|
+
/**
|
|
9
|
+
* Characters forbidden in XML tag names and attribute names.
|
|
10
|
+
* Covers structural delimiters (`<`, `>`, `=`), quote characters, and
|
|
11
|
+
* whitespace that would break well-formedness if interpolated unchecked.
|
|
12
|
+
*/
|
|
13
|
+
const INVALID_NAME_CHARS = /[<>=\s"']/;
|
|
14
|
+
function validateTagName(tag) {
|
|
15
|
+
if (tag.length === 0) throw new Error("Invalid tag name: tag name must not be empty");
|
|
16
|
+
const name = tag.charCodeAt(0) === QUESTION || tag.charCodeAt(0) === BANG ? tag.substring(1) : tag;
|
|
17
|
+
if (name.length === 0) return;
|
|
18
|
+
if (INVALID_NAME_CHARS.test(name)) throw new Error(`Invalid tag name: "${tag}" contains forbidden characters`);
|
|
19
|
+
}
|
|
20
|
+
function validateAttributeNames(attributes) {
|
|
21
|
+
const keys = Object.keys(attributes);
|
|
22
|
+
for (let i = 0; i < keys.length; i++) {
|
|
23
|
+
const key = keys[i];
|
|
24
|
+
if (key.length === 0 || INVALID_NAME_CHARS.test(key)) throw new Error(`Invalid attribute name: "${key}" contains forbidden characters`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Recursion depth at which the circular-reference WeakSet guard kicks in.
|
|
29
|
+
* Below this depth, no WeakSet overhead is incurred — the natural call-stack
|
|
30
|
+
* limit provides a safety net. Typical XML documents have depth < 10.
|
|
31
|
+
*/
|
|
32
|
+
const CIRCULAR_CHECK_DEPTH = 16;
|
|
33
|
+
function write(input, options) {
|
|
34
|
+
if (!input) return "";
|
|
35
|
+
const dom = toDom(input);
|
|
36
|
+
if (!options || !options.pretty && !options.entities && !options.html) return compactWrite(dom);
|
|
37
|
+
return fullWriter(dom, options);
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Test whether a string is a simple keyword that can appear unquoted in a
|
|
41
|
+
* DOCTYPE declaration (e.g. `html`, `PUBLIC`, `SYSTEM`).
|
|
42
|
+
* Anything else (URIs, public identifiers) must be double-quoted.
|
|
43
|
+
*/
|
|
44
|
+
const SIMPLE_KEYWORD = /^[A-Za-z][A-Za-z0-9_-]*$/;
|
|
45
|
+
function compactWrite(input) {
|
|
46
|
+
let out = "";
|
|
47
|
+
let seen = null;
|
|
48
|
+
function writeChildren(nodes, depth) {
|
|
49
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
50
|
+
const node = nodes[i];
|
|
51
|
+
if (typeof node === "string") out += node;
|
|
52
|
+
else writeNode(node, depth);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
function writeNode(node, depth) {
|
|
56
|
+
if (depth >= CIRCULAR_CHECK_DEPTH) {
|
|
57
|
+
if (seen === null) seen = /* @__PURE__ */ new WeakSet();
|
|
58
|
+
if (seen.has(node)) throw new Error("Circular reference detected in TNode tree");
|
|
59
|
+
seen.add(node);
|
|
60
|
+
}
|
|
61
|
+
const tag = node.tagName;
|
|
62
|
+
validateTagName(tag);
|
|
63
|
+
const attributes = node.attributes;
|
|
64
|
+
const firstChar = tag.charCodeAt(0);
|
|
65
|
+
if (attributes !== null && firstChar !== BANG) validateAttributeNames(attributes);
|
|
66
|
+
if (attributes === null) {
|
|
67
|
+
if (firstChar === QUESTION) {
|
|
68
|
+
out += "<" + tag + "?>";
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
if (firstChar === BANG) {
|
|
72
|
+
out += "<" + tag + ">";
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
out += "<" + tag + ">";
|
|
76
|
+
} else {
|
|
77
|
+
out += "<" + tag;
|
|
78
|
+
const isDeclaration = firstChar === BANG;
|
|
79
|
+
const keys = Object.keys(attributes);
|
|
80
|
+
for (let j = 0; j < keys.length; j++) {
|
|
81
|
+
const attributeName = keys[j];
|
|
82
|
+
const attributeValue = attributes[attributeName];
|
|
83
|
+
if (attributeValue === null) if (isDeclaration && !SIMPLE_KEYWORD.test(attributeName)) out += " \"" + attributeName + "\"";
|
|
84
|
+
else out += " " + attributeName;
|
|
85
|
+
else if (attributeValue.indexOf("\"") === -1) out += " " + attributeName + "=\"" + attributeValue + "\"";
|
|
86
|
+
else if (attributeValue.indexOf("'") === -1) out += " " + attributeName + "='" + attributeValue + "'";
|
|
87
|
+
else out += " " + attributeName + "='" + attributeValue.replace(/'/g, "'") + "'";
|
|
88
|
+
}
|
|
89
|
+
if (firstChar === QUESTION) {
|
|
90
|
+
out += "?>";
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
if (firstChar === BANG) {
|
|
94
|
+
out += ">";
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
out += ">";
|
|
98
|
+
}
|
|
99
|
+
writeChildren(node.children, depth + 1);
|
|
100
|
+
out += "</" + tag + ">";
|
|
101
|
+
}
|
|
102
|
+
writeChildren(Array.isArray(input) ? input : [input], 0);
|
|
103
|
+
return out;
|
|
104
|
+
}
|
|
105
|
+
function fullWriter(input, options) {
|
|
106
|
+
const indent = !options.pretty ? "" : typeof options.pretty === "string" ? options.pretty : " ";
|
|
107
|
+
const encodeEntities = !!options.entities;
|
|
108
|
+
const htmlMode = !!options.html;
|
|
109
|
+
const encodeRawText = encodeEntities ? htmlMode ? escapeUTF8 : escapeText : (input) => input;
|
|
110
|
+
/** Encode text content, but pass comments through verbatim. */
|
|
111
|
+
const encodeTextContent = (input) => input.startsWith("<!--") ? input : encodeRawText(input);
|
|
112
|
+
const encodeAttributeValue = encodeEntities ? htmlMode ? escapeUTF8 : escapeAttribute : (input) => input;
|
|
113
|
+
const voidSet = htmlMode ? new Set(HTML_VOID_ELEMENTS) : null;
|
|
114
|
+
if (!indent) {
|
|
115
|
+
let out = "";
|
|
116
|
+
let seen = null;
|
|
117
|
+
function writeChildren(nodes, depth) {
|
|
118
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
119
|
+
const node = nodes[i];
|
|
120
|
+
if (typeof node === "string") out += encodeTextContent(node);
|
|
121
|
+
else writeNode(node, depth);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
function writeNode(node, depth) {
|
|
125
|
+
if (depth >= CIRCULAR_CHECK_DEPTH) {
|
|
126
|
+
if (seen === null) seen = /* @__PURE__ */ new WeakSet();
|
|
127
|
+
if (seen.has(node)) throw new Error("Circular reference detected in TNode tree");
|
|
128
|
+
seen.add(node);
|
|
129
|
+
}
|
|
130
|
+
const tag = node.tagName;
|
|
131
|
+
validateTagName(tag);
|
|
132
|
+
const attributes = node.attributes;
|
|
133
|
+
const firstChar = tag.charCodeAt(0);
|
|
134
|
+
if (attributes !== null && firstChar !== BANG) validateAttributeNames(attributes);
|
|
135
|
+
if (attributes === null) {
|
|
136
|
+
if (firstChar === QUESTION) {
|
|
137
|
+
out += "<" + tag + "?>";
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
if (firstChar === BANG) {
|
|
141
|
+
out += "<" + tag + ">";
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
if (voidSet !== null && voidSet.has(tag)) {
|
|
145
|
+
out += "<" + tag + ">";
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
out += "<" + tag + ">";
|
|
149
|
+
} else {
|
|
150
|
+
out += "<" + tag;
|
|
151
|
+
const isDeclaration = firstChar === BANG;
|
|
152
|
+
const keys = Object.keys(attributes);
|
|
153
|
+
for (let j = 0; j < keys.length; j++) {
|
|
154
|
+
const attributeName = keys[j];
|
|
155
|
+
const attributeValue = attributes[attributeName];
|
|
156
|
+
if (attributeValue === null) if (isDeclaration && !SIMPLE_KEYWORD.test(attributeName)) out += " \"" + attributeName + "\"";
|
|
157
|
+
else out += " " + attributeName;
|
|
158
|
+
else {
|
|
159
|
+
const encoded = encodeAttributeValue(attributeValue);
|
|
160
|
+
if (encoded.indexOf("\"") === -1) out += " " + attributeName + "=\"" + encoded + "\"";
|
|
161
|
+
else if (encoded.indexOf("'") === -1) out += " " + attributeName + "='" + encoded + "'";
|
|
162
|
+
else out += " " + attributeName + "='" + encoded.replace(/'/g, "'") + "'";
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
if (firstChar === QUESTION) {
|
|
166
|
+
out += "?>";
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
if (firstChar === BANG) {
|
|
170
|
+
out += ">";
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
if (voidSet !== null && voidSet.has(tag)) {
|
|
174
|
+
out += ">";
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
out += ">";
|
|
178
|
+
}
|
|
179
|
+
writeChildren(node.children, depth + 1);
|
|
180
|
+
out += "</" + tag + ">";
|
|
181
|
+
}
|
|
182
|
+
writeChildren(Array.isArray(input) ? input : [input], 0);
|
|
183
|
+
return out;
|
|
184
|
+
}
|
|
185
|
+
let out = "";
|
|
186
|
+
let seen = null;
|
|
187
|
+
function hasTextChildren(nodes) {
|
|
188
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
189
|
+
const node = nodes[i];
|
|
190
|
+
if (typeof node === "string" && !node.startsWith("<!--")) return true;
|
|
191
|
+
}
|
|
192
|
+
return false;
|
|
193
|
+
}
|
|
194
|
+
function prettyWriteAttributes(node, isDeclaration = false) {
|
|
195
|
+
if (node.attributes === null) return;
|
|
196
|
+
const keys = Object.keys(node.attributes);
|
|
197
|
+
for (let j = 0; j < keys.length; j++) {
|
|
198
|
+
const key = keys[j];
|
|
199
|
+
const attributeValue = node.attributes[key];
|
|
200
|
+
if (attributeValue === null) if (isDeclaration && !SIMPLE_KEYWORD.test(key)) out += " \"" + key + "\"";
|
|
201
|
+
else out += " " + key;
|
|
202
|
+
else {
|
|
203
|
+
const encoded = encodeAttributeValue(attributeValue);
|
|
204
|
+
if (encoded.indexOf("\"") === -1) out += " " + key + "=\"" + encoded + "\"";
|
|
205
|
+
else if (encoded.indexOf("'") === -1) out += " " + key + "='" + encoded + "'";
|
|
206
|
+
else out += " " + key + "='" + encoded.replace(/'/g, "'") + "'";
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
function prettyWriteNode(node, depth) {
|
|
211
|
+
if (!node) return;
|
|
212
|
+
if (depth >= CIRCULAR_CHECK_DEPTH) {
|
|
213
|
+
if (seen === null) seen = /* @__PURE__ */ new WeakSet();
|
|
214
|
+
if (seen.has(node)) throw new Error("Circular reference detected in TNode tree");
|
|
215
|
+
seen.add(node);
|
|
216
|
+
}
|
|
217
|
+
const tag = node.tagName;
|
|
218
|
+
validateTagName(tag);
|
|
219
|
+
const padding = indent.repeat(depth);
|
|
220
|
+
const firstChar = tag.charCodeAt(0);
|
|
221
|
+
if (node.attributes !== null && firstChar !== BANG) validateAttributeNames(node.attributes);
|
|
222
|
+
if (firstChar === QUESTION) {
|
|
223
|
+
out += padding + "<" + tag;
|
|
224
|
+
prettyWriteAttributes(node);
|
|
225
|
+
out += "?>";
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
if (firstChar === BANG) {
|
|
229
|
+
out += padding + "<" + tag;
|
|
230
|
+
prettyWriteAttributes(node, true);
|
|
231
|
+
out += ">";
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
const children = node.children;
|
|
235
|
+
const childrenLength = children.length;
|
|
236
|
+
if (voidSet !== null && voidSet.has(tag)) {
|
|
237
|
+
out += padding + "<" + tag;
|
|
238
|
+
prettyWriteAttributes(node);
|
|
239
|
+
out += ">";
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
if (childrenLength === 0) {
|
|
243
|
+
out += padding + "<" + tag;
|
|
244
|
+
prettyWriteAttributes(node);
|
|
245
|
+
out += "/>";
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
const hasText = hasTextChildren(children);
|
|
249
|
+
let hasElements = false;
|
|
250
|
+
if (hasText) {
|
|
251
|
+
for (let i = 0; i < childrenLength; i++) if (typeof children[i] !== "string") {
|
|
252
|
+
hasElements = true;
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
if (hasText && !hasElements) {
|
|
257
|
+
out += padding + "<" + tag;
|
|
258
|
+
prettyWriteAttributes(node);
|
|
259
|
+
out += ">";
|
|
260
|
+
let first = true;
|
|
261
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
262
|
+
const text = children[i].trim();
|
|
263
|
+
if (text.length === 0) continue;
|
|
264
|
+
if (!first) out += " ";
|
|
265
|
+
first = false;
|
|
266
|
+
out += encodeTextContent(text);
|
|
267
|
+
}
|
|
268
|
+
out += "</" + tag + ">";
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
if (hasText) {
|
|
272
|
+
const childPadding = indent.repeat(depth + 1);
|
|
273
|
+
out += padding + "<" + tag;
|
|
274
|
+
prettyWriteAttributes(node);
|
|
275
|
+
out += ">";
|
|
276
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
277
|
+
const child = children[i];
|
|
278
|
+
if (typeof child === "string") if (child.startsWith("<!--")) out += "\n" + childPadding + encodeTextContent(child);
|
|
279
|
+
else {
|
|
280
|
+
const trimmed = child.trim();
|
|
281
|
+
if (trimmed.length > 0) out += "\n" + childPadding + encodeTextContent(trimmed);
|
|
282
|
+
}
|
|
283
|
+
else {
|
|
284
|
+
out += "\n";
|
|
285
|
+
prettyWriteNode(child, depth + 1);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
out += "\n" + padding + "</" + tag + ">";
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
out += padding + "<" + tag;
|
|
292
|
+
prettyWriteAttributes(node);
|
|
293
|
+
out += ">";
|
|
294
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
295
|
+
const child = children[i];
|
|
296
|
+
if (typeof child === "string") out += "\n" + indent.repeat(depth + 1) + encodeTextContent(child);
|
|
297
|
+
else {
|
|
298
|
+
out += "\n";
|
|
299
|
+
prettyWriteNode(child, depth + 1);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
out += "\n" + padding + "</" + tag + ">";
|
|
303
|
+
}
|
|
304
|
+
const nodes = Array.isArray(input) ? input : [input];
|
|
305
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
306
|
+
const node = nodes[i];
|
|
307
|
+
if (i > 0) out += "\n";
|
|
308
|
+
if (typeof node === "string") out += encodeTextContent(node);
|
|
309
|
+
else prettyWriteNode(node, 0);
|
|
310
|
+
}
|
|
311
|
+
return out;
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Detect the input format and convert to a `(TNode | string)[]` DOM array
|
|
315
|
+
* suitable for the writer functions.
|
|
316
|
+
*
|
|
317
|
+
* Detection heuristics:
|
|
318
|
+
* 1. A single TNode (has `tagName` string property) → pass through
|
|
319
|
+
* 2. An array whose first non-string element has `tagName` → already DOM
|
|
320
|
+
* 3. An array whose first non-string element has `$text`, `$comment`, `$attr`,
|
|
321
|
+
* or a single key mapping to an array → lossless format
|
|
322
|
+
* 4. Anything else (including `null`, bare strings, objects without `tagName`) → lossy
|
|
323
|
+
*/
|
|
324
|
+
function toDom(input) {
|
|
325
|
+
if (input === null || input === void 0) return [];
|
|
326
|
+
if (!Array.isArray(input) && typeof input === "object" && isTNode(input)) return input;
|
|
327
|
+
if (!Array.isArray(input) && typeof input !== "object") return fromLossy(input);
|
|
328
|
+
if (!Array.isArray(input)) return fromLossy(input);
|
|
329
|
+
const array = input;
|
|
330
|
+
if (array.length === 0) return [];
|
|
331
|
+
let sample = void 0;
|
|
332
|
+
for (let i = 0; i < array.length; i++) if (typeof array[i] !== "string") {
|
|
333
|
+
sample = array[i];
|
|
334
|
+
break;
|
|
335
|
+
}
|
|
336
|
+
if (sample === void 0) return array;
|
|
337
|
+
if (typeof sample === "object" && sample !== null && isTNode(sample)) return array;
|
|
338
|
+
if (typeof sample === "object" && sample !== null && isLosslessEntry(sample)) return fromLossless(array);
|
|
339
|
+
return fromLossy(array);
|
|
340
|
+
}
|
|
341
|
+
/** Check if a value looks like a TNode (has tagName string + children array). */
|
|
342
|
+
function isTNode(value) {
|
|
343
|
+
return typeof value === "object" && value !== null && typeof value.tagName === "string" && Array.isArray(value.children);
|
|
344
|
+
}
|
|
345
|
+
/** Check if a value looks like a LosslessEntry. */
|
|
346
|
+
function isLosslessEntry(value) {
|
|
347
|
+
if (typeof value !== "object" || value === null) return false;
|
|
348
|
+
const keys = Object.keys(value);
|
|
349
|
+
if (keys.length === 0) return false;
|
|
350
|
+
if ("$text" in value || "$comment" in value || "$attr" in value) return true;
|
|
351
|
+
if (keys.length === 1 && Array.isArray(value[keys[0]])) return true;
|
|
352
|
+
return false;
|
|
353
|
+
}
|
|
354
|
+
//#endregion
|
|
355
|
+
export { write };
|
|
356
|
+
|
|
357
|
+
//# sourceMappingURL=writer.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"writer.mjs","names":[],"sources":["../src/writer.ts"],"sourcesContent":["import { escapeText, escapeAttribute, escapeUTF8 } from 'entities';\nimport type { TNode } from '#src/parser.ts';\nimport type { LossyValue } from '#src/converters/lossy.ts';\nimport type { LosslessEntry } from '#src/converters/lossless.ts';\nimport { fromLossy } from '#src/converters/fromLossy.ts';\nimport { fromLossless } from '#src/converters/fromLossless.ts';\nimport { HTML_VOID_ELEMENTS } from '#src/utilities/htmlConstants.ts';\n// @generated:char-codes:begin\nconst BANG = 33; // !\nconst QUESTION = 63; // ?\n// @generated:char-codes:end\n\n/**\n * Characters forbidden in XML tag names and attribute names.\n * Covers structural delimiters (`<`, `>`, `=`), quote characters, and\n * whitespace that would break well-formedness if interpolated unchecked.\n */\nconst INVALID_NAME_CHARS = /[<>=\\s\"']/;\n\nfunction validateTagName(tag: string): void {\n if (tag.length === 0) {\n throw new Error('Invalid tag name: tag name must not be empty');\n }\n // Allow leading `?` (PI) or `!` (DOCTYPE), validate the rest\n const name =\n tag.charCodeAt(0) === QUESTION || tag.charCodeAt(0) === BANG\n ? tag.substring(1)\n : tag;\n if (name.length === 0) return; // bare `?` or `!` is degenerate but harmless\n if (INVALID_NAME_CHARS.test(name)) {\n throw new Error(`Invalid tag name: \"${tag}\" contains forbidden characters`);\n }\n}\n\nfunction validateAttributeNames(attributes: Record<string, unknown>): void {\n const keys = Object.keys(attributes);\n for (let i = 0; i < keys.length; i++) {\n const key = keys[i]!;\n if (key.length === 0 || INVALID_NAME_CHARS.test(key)) {\n throw new Error(\n `Invalid attribute name: \"${key}\" contains forbidden characters`,\n );\n }\n }\n}\n\n/**\n * Recursion depth at which the circular-reference WeakSet guard kicks in.\n * Below this depth, no WeakSet overhead is incurred — the natural call-stack\n * limit provides a safety net. Typical XML documents have depth < 10.\n */\nconst CIRCULAR_CHECK_DEPTH = 16;\n\n/** Options for write. */\nexport interface WriterOptions {\n /**\n * Pretty-print with indentation. When enabled, each element is placed on\n * its own line with proper nesting indentation.\n *\n * - `true` uses two spaces as the indent.\n * - A string value is used as the indent directly (e.g. `\"\\t\"`, `\" \"`).\n *\n * Text-only elements are kept inline: `<name>Alice</name>`.\n * Mixed content (text interleaved with elements) is also kept inline to\n * avoid altering whitespace semantics.\n */\n pretty?: boolean | string;\n /**\n * Encode special characters in text content and attribute values as XML\n * entities. In XML mode (default), `&`, `<`, `>` are encoded in text and\n * `&`, `\"`, `'` are encoded in attributes. In HTML mode (`html: true`),\n * the full HTML named entity set is used (e.g. `©`, `é`).\n *\n * Defaults to `false` — text and attribute values are written verbatim.\n */\n entities?: boolean;\n /**\n * Enable HTML mode. When enabled:\n * - Void elements (`<br>`, `<img>`, `<hr>`, etc.) are self-closed without\n * a closing tag (e.g. `<br>` instead of `<br></br>` or `<br/>`).\n * - When `entities` is also `true`, uses HTML named entities\n * (e.g. `©` instead of `©`) for encoding.\n */\n html?: boolean;\n}\n\n/** Input types accepted by `write()`. */\nexport type WriterInput =\n | TNode\n | (TNode | string)[]\n | LossyValue\n | LossyValue[]\n | LosslessEntry[];\n\n/**\n * Serialize a parsed DOM, lossy object, or lossless entry array back to XML.\n *\n * Input format is auto-detected:\n * - `TNode` or `(TNode | string)[]` — DOM tree, serialized directly\n * - `LossyValue` or `LossyValue[]` — converted to DOM via `fromLossy()` first\n * - `LosslessEntry[]` — converted to DOM via `fromLossless()` first\n *\n * @param input - The node(s), lossy object(s), or lossless entries to serialize\n * @param options - Formatting options\n * @returns XML string\n */\nexport function write(\n input: TNode | (TNode | string)[],\n options?: WriterOptions,\n): string;\nexport function write(\n input: LossyValue | LossyValue[],\n options?: WriterOptions,\n): string;\nexport function write(input: LosslessEntry[], options?: WriterOptions): string;\nexport function write(input: WriterInput, options?: WriterOptions): string;\nexport function write(input: WriterInput, options?: WriterOptions): string {\n if (!input) return '';\n\n const dom = toDom(input);\n\n // Fast path: no options — skip all option parsing, closure creation,\n // identity function allocation, and voidSet construction.\n if (!options || (!options.pretty && !options.entities && !options.html)) {\n return compactWrite(dom);\n }\n\n return fullWriter(dom, options);\n}\n\n// ---------------------------------------------------------------------------\n// Fast path — compact output, no entities, no HTML mode\n// ---------------------------------------------------------------------------\n\n/**\n * Test whether a string is a simple keyword that can appear unquoted in a\n * DOCTYPE declaration (e.g. `html`, `PUBLIC`, `SYSTEM`).\n * Anything else (URIs, public identifiers) must be double-quoted.\n */\nconst SIMPLE_KEYWORD = /^[A-Za-z][A-Za-z0-9_-]*$/;\n\nfunction compactWrite(input: TNode | (TNode | string)[]): string {\n let out = '';\n let seen: WeakSet<TNode> | null = null;\n\n function writeChildren(nodes: (TNode | string)[], depth: number): void {\n for (let i = 0; i < nodes.length; i++) {\n const node = nodes[i]!;\n if (typeof node === 'string') {\n out += node;\n } else {\n writeNode(node, depth);\n }\n }\n }\n\n function writeNode(node: TNode, depth: number): void {\n if (depth >= CIRCULAR_CHECK_DEPTH) {\n if (seen === null) seen = new WeakSet<TNode>();\n if (seen.has(node)) {\n throw new Error('Circular reference detected in TNode tree');\n }\n seen.add(node);\n }\n const tag = node.tagName;\n validateTagName(tag);\n const attributes = node.attributes;\n const firstChar = tag.charCodeAt(0);\n // Skip attribute name validation for declarations (!DOCTYPE etc.)\n // where keys are quoted identifiers, not XML attribute names\n if (attributes !== null && firstChar !== BANG) {\n validateAttributeNames(attributes);\n }\n if (attributes === null) {\n // No attributes — combine open tag into one concat\n if (firstChar === QUESTION) {\n out += '<' + tag + '?>';\n return;\n }\n if (firstChar === BANG) {\n out += '<' + tag + '>';\n return;\n }\n out += '<' + tag + '>';\n } else {\n out += '<' + tag;\n const isDeclaration = firstChar === BANG;\n const keys = Object.keys(attributes);\n for (let j = 0; j < keys.length; j++) {\n const attributeName = keys[j]!;\n const attributeValue = attributes[attributeName];\n if (attributeValue === null) {\n if (isDeclaration && !SIMPLE_KEYWORD.test(attributeName)) {\n out += ' \"' + attributeName + '\"';\n } else {\n out += ' ' + attributeName;\n }\n } else if (attributeValue.indexOf('\"') === -1) {\n out += ' ' + attributeName + '=\"' + attributeValue + '\"';\n } else if (attributeValue.indexOf(\"'\") === -1) {\n out += ' ' + attributeName + \"='\" + attributeValue + \"'\";\n } else {\n // Value contains both quote types — escape single quotes\n out +=\n ' ' +\n attributeName +\n \"='\" +\n attributeValue.replace(/'/g, ''') +\n \"'\";\n }\n }\n if (firstChar === QUESTION) {\n out += '?>';\n return;\n }\n if (firstChar === BANG) {\n out += '>';\n return;\n }\n out += '>';\n }\n writeChildren(node.children, depth + 1);\n out += '</' + tag + '>';\n }\n\n writeChildren(Array.isArray(input) ? input : [input], 0);\n return out;\n}\n\n// ---------------------------------------------------------------------------\n// Full-featured path — entities, HTML mode, and/or pretty printing\n// ---------------------------------------------------------------------------\n\nfunction fullWriter(\n input: TNode | (TNode | string)[],\n options: WriterOptions,\n): string {\n const indent = !options.pretty\n ? ''\n : typeof options.pretty === 'string'\n ? options.pretty\n : ' ';\n\n // Entity encoding functions — identity when disabled\n const encodeEntities = !!options.entities;\n const htmlMode = !!options.html;\n const encodeRawText: (input: string) => string = encodeEntities\n ? htmlMode\n ? escapeUTF8\n : escapeText\n : (input) => input;\n /** Encode text content, but pass comments through verbatim. */\n const encodeTextContent = (input: string): string =>\n input.startsWith('<!--') ? input : encodeRawText(input);\n const encodeAttributeValue: (input: string) => string = encodeEntities\n ? htmlMode\n ? escapeUTF8\n : escapeAttribute\n : (input) => input;\n\n // HTML void elements — self-close without </tag> in html mode\n const voidSet: Set<string> | null = htmlMode\n ? new Set(HTML_VOID_ELEMENTS)\n : null;\n\n // Compact path with entities/html support\n if (!indent) {\n let out = '';\n let seen: WeakSet<TNode> | null = null;\n\n function writeChildren(nodes: (TNode | string)[], depth: number): void {\n for (let i = 0; i < nodes.length; i++) {\n const node = nodes[i]!;\n if (typeof node === 'string') {\n out += encodeTextContent(node);\n } else {\n writeNode(node, depth);\n }\n }\n }\n\n function writeNode(node: TNode, depth: number): void {\n if (depth >= CIRCULAR_CHECK_DEPTH) {\n if (seen === null) seen = new WeakSet<TNode>();\n if (seen.has(node)) {\n throw new Error('Circular reference detected in TNode tree');\n }\n seen.add(node);\n }\n const tag = node.tagName;\n validateTagName(tag);\n const attributes = node.attributes;\n const firstChar = tag.charCodeAt(0);\n // Skip attribute name validation for declarations (!DOCTYPE etc.)\n // where keys are quoted identifiers, not XML attribute names\n if (attributes !== null && firstChar !== BANG) {\n validateAttributeNames(attributes);\n }\n if (attributes === null) {\n if (firstChar === QUESTION) {\n out += '<' + tag + '?>';\n return;\n }\n // Declaration tags (e.g. !DOCTYPE) — void in all modes\n if (firstChar === BANG) {\n out += '<' + tag + '>';\n return;\n }\n // HTML void elements self-close without a closing tag\n if (voidSet !== null && voidSet.has(tag)) {\n out += '<' + tag + '>';\n return;\n }\n out += '<' + tag + '>';\n } else {\n out += '<' + tag;\n const isDeclaration = firstChar === BANG;\n const keys = Object.keys(attributes);\n for (let j = 0; j < keys.length; j++) {\n const attributeName = keys[j]!;\n const attributeValue = attributes[attributeName];\n if (attributeValue === null) {\n if (isDeclaration && !SIMPLE_KEYWORD.test(attributeName)) {\n out += ' \"' + attributeName + '\"';\n } else {\n out += ' ' + attributeName;\n }\n } else {\n const encoded = encodeAttributeValue(attributeValue);\n if (encoded.indexOf('\"') === -1) {\n out += ' ' + attributeName + '=\"' + encoded + '\"';\n } else if (encoded.indexOf(\"'\") === -1) {\n out += ' ' + attributeName + \"='\" + encoded + \"'\";\n } else {\n // Value contains both quote types — escape single quotes\n out +=\n ' ' +\n attributeName +\n \"='\" +\n encoded.replace(/'/g, ''') +\n \"'\";\n }\n }\n }\n if (firstChar === QUESTION) {\n out += '?>';\n return;\n }\n // Declaration tags (e.g. !DOCTYPE) — void in all modes\n if (firstChar === BANG) {\n out += '>';\n return;\n }\n // HTML void elements self-close without a closing tag\n if (voidSet !== null && voidSet.has(tag)) {\n out += '>';\n return;\n }\n out += '>';\n }\n writeChildren(node.children, depth + 1);\n out += '</' + tag + '>';\n }\n\n writeChildren(Array.isArray(input) ? input : [input], 0);\n return out;\n }\n\n // Pretty path\n let out = '';\n let seen: WeakSet<TNode> | null = null;\n\n function hasTextChildren(nodes: (TNode | string)[]): boolean {\n for (let i = 0; i < nodes.length; i++) {\n const node = nodes[i];\n // Comments are structural (like elements) — they don't force inline mode\n if (typeof node === 'string' && !node.startsWith('<!--')) return true;\n }\n return false;\n }\n\n function prettyWriteAttributes(node: TNode, isDeclaration = false): void {\n if (node.attributes === null) return;\n const keys = Object.keys(node.attributes);\n for (let j = 0; j < keys.length; j++) {\n const key = keys[j]!;\n const attributeValue = node.attributes[key];\n if (attributeValue === null) {\n if (isDeclaration && !SIMPLE_KEYWORD.test(key)) {\n out += ' \"' + key + '\"';\n } else {\n out += ' ' + key;\n }\n } else {\n const encoded = encodeAttributeValue(attributeValue);\n if (encoded.indexOf('\"') === -1) {\n out += ' ' + key + '=\"' + encoded + '\"';\n } else if (encoded.indexOf(\"'\") === -1) {\n out += ' ' + key + \"='\" + encoded + \"'\";\n } else {\n // Value contains both quote types — escape single quotes\n out += ' ' + key + \"='\" + encoded.replace(/'/g, ''') + \"'\";\n }\n }\n }\n }\n\n function prettyWriteNode(node: TNode, depth: number): void {\n if (!node) return;\n if (depth >= CIRCULAR_CHECK_DEPTH) {\n if (seen === null) seen = new WeakSet<TNode>();\n if (seen.has(node)) {\n throw new Error('Circular reference detected in TNode tree');\n }\n seen.add(node);\n }\n const tag = node.tagName;\n validateTagName(tag);\n const padding = indent.repeat(depth);\n const firstChar = tag.charCodeAt(0);\n // Skip attribute name validation for declarations (!DOCTYPE etc.)\n // where keys are quoted identifiers, not XML attribute names\n if (node.attributes !== null && firstChar !== BANG) {\n validateAttributeNames(node.attributes);\n }\n\n // Processing instruction\n if (firstChar === QUESTION) {\n out += padding + '<' + tag;\n prettyWriteAttributes(node);\n out += '?>';\n return;\n }\n\n // Declaration tags (e.g. !DOCTYPE) — void in all modes\n if (firstChar === BANG) {\n out += padding + '<' + tag;\n prettyWriteAttributes(node, true);\n out += '>';\n return;\n }\n\n const children = node.children;\n const childrenLength = children.length;\n\n // HTML void elements — self-close without closing tag\n if (voidSet !== null && voidSet.has(tag)) {\n out += padding + '<' + tag;\n prettyWriteAttributes(node);\n out += '>';\n return;\n }\n\n // Empty element — self-close\n if (childrenLength === 0) {\n out += padding + '<' + tag;\n prettyWriteAttributes(node);\n out += '/>';\n return;\n }\n\n // Classify children\n const hasText = hasTextChildren(children);\n let hasElements = false;\n if (hasText) {\n for (let i = 0; i < childrenLength; i++) {\n if (typeof children[i] !== 'string') {\n hasElements = true;\n break;\n }\n }\n }\n\n // Text-only (no element children) — trim each text child, join with\n // a single space, and write inline on one line.\n if (hasText && !hasElements) {\n out += padding + '<' + tag;\n prettyWriteAttributes(node);\n out += '>';\n let first = true;\n for (let i = 0; i < childrenLength; i++) {\n const text = (children[i] as string).trim();\n if (text.length === 0) continue;\n if (!first) out += ' ';\n first = false;\n out += encodeTextContent(text);\n }\n out += '</' + tag + '>';\n return;\n }\n\n // Mixed content — trim text nodes, drop empty, place each non-empty\n // text and each element child on its own indented line.\n if (hasText) {\n const childPadding = indent.repeat(depth + 1);\n out += padding + '<' + tag;\n prettyWriteAttributes(node);\n out += '>';\n for (let i = 0; i < childrenLength; i++) {\n const child = children[i]!;\n if (typeof child === 'string') {\n // Comments pass through without trimming\n if (child.startsWith('<!--')) {\n out += '\\n' + childPadding + encodeTextContent(child);\n } else {\n const trimmed = child.trim();\n if (trimmed.length > 0) {\n out += '\\n' + childPadding + encodeTextContent(trimmed);\n }\n }\n } else {\n out += '\\n';\n prettyWriteNode(child, depth + 1);\n }\n }\n out += '\\n' + padding + '</' + tag + '>';\n return;\n }\n\n // Element-only children (and comments) — indent each child\n out += padding + '<' + tag;\n prettyWriteAttributes(node);\n out += '>';\n for (let i = 0; i < childrenLength; i++) {\n const child = children[i]!;\n if (typeof child === 'string') {\n // Comments get their own indented line\n out += '\\n' + indent.repeat(depth + 1) + encodeTextContent(child);\n } else {\n out += '\\n';\n prettyWriteNode(child, depth + 1);\n }\n }\n out += '\\n' + padding + '</' + tag + '>';\n }\n\n const nodes = Array.isArray(input) ? input : [input];\n for (let i = 0; i < nodes.length; i++) {\n const node = nodes[i]!;\n if (i > 0) out += '\\n';\n if (typeof node === 'string') {\n out += encodeTextContent(node);\n } else {\n prettyWriteNode(node, 0);\n }\n }\n\n return out;\n}\n\n// ---------------------------------------------------------------------------\n// Input format detection and conversion\n// ---------------------------------------------------------------------------\n\n/**\n * Detect the input format and convert to a `(TNode | string)[]` DOM array\n * suitable for the writer functions.\n *\n * Detection heuristics:\n * 1. A single TNode (has `tagName` string property) → pass through\n * 2. An array whose first non-string element has `tagName` → already DOM\n * 3. An array whose first non-string element has `$text`, `$comment`, `$attr`,\n * or a single key mapping to an array → lossless format\n * 4. Anything else (including `null`, bare strings, objects without `tagName`) → lossy\n */\nfunction toDom(\n input:\n | TNode\n | (TNode | string)[]\n | LossyValue\n | LossyValue[]\n | LosslessEntry[],\n): TNode | (TNode | string)[] {\n if (input === null || input === undefined) return [];\n\n // Single TNode — pass through\n if (!Array.isArray(input) && typeof input === 'object' && isTNode(input)) {\n return input as TNode;\n }\n\n // Non-object, non-array: bare string (lossy top-level string)\n if (!Array.isArray(input) && typeof input !== 'object') {\n return fromLossy(input as LossyValue);\n }\n\n // Non-array object without tagName → lossy object\n if (!Array.isArray(input)) {\n return fromLossy(input as LossyValue);\n }\n\n // Array — need to distinguish DOM, lossless, and lossy\n const array = input as unknown[];\n if (array.length === 0) return [];\n\n // Find the first non-string element to inspect\n let sample: unknown = undefined;\n for (let i = 0; i < array.length; i++) {\n if (typeof array[i] !== 'string') {\n sample = array[i];\n break;\n }\n }\n\n // All strings → could be DOM (text-only top level) — pass through\n if (sample === undefined) return array as string[];\n\n // TNode in the array → DOM format\n if (typeof sample === 'object' && sample !== null && isTNode(sample)) {\n return array as (TNode | string)[];\n }\n\n // Lossless entry: has $text, $comment, $attr, or single key → array value\n if (\n typeof sample === 'object' &&\n sample !== null &&\n isLosslessEntry(sample)\n ) {\n return fromLossless(array as LosslessEntry[]);\n }\n\n // Everything else → lossy\n return fromLossy(array as LossyValue[]);\n}\n\n/** Check if a value looks like a TNode (has tagName string + children array). */\nfunction isTNode(value: unknown): value is TNode {\n return (\n typeof value === 'object' &&\n value !== null &&\n typeof (value as TNode).tagName === 'string' &&\n Array.isArray((value as TNode).children)\n );\n}\n\n/** Check if a value looks like a LosslessEntry. */\nfunction isLosslessEntry(value: unknown): value is LosslessEntry {\n if (typeof value !== 'object' || value === null) return false;\n const keys = Object.keys(value);\n if (keys.length === 0) return false;\n // Known lossless marker keys\n if ('$text' in value || '$comment' in value || '$attr' in value) return true;\n // Single key mapping to an array → element entry\n if (\n keys.length === 1 &&\n Array.isArray((value as Record<string, unknown>)[keys[0]!])\n ) {\n return true;\n }\n return false;\n}\n"],"mappings":";;;;;AAQA,MAAM,OAAO;AACb,MAAM,WAAW;;;;;;AAQjB,MAAM,qBAAqB;AAE3B,SAAS,gBAAgB,KAAmB;AAC1C,KAAI,IAAI,WAAW,EACjB,OAAM,IAAI,MAAM,+CAA+C;CAGjE,MAAM,OACJ,IAAI,WAAW,EAAE,KAAK,YAAY,IAAI,WAAW,EAAE,KAAK,OACpD,IAAI,UAAU,EAAE,GAChB;AACN,KAAI,KAAK,WAAW,EAAG;AACvB,KAAI,mBAAmB,KAAK,KAAK,CAC/B,OAAM,IAAI,MAAM,sBAAsB,IAAI,iCAAiC;;AAI/E,SAAS,uBAAuB,YAA2C;CACzE,MAAM,OAAO,OAAO,KAAK,WAAW;AACpC,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;EACpC,MAAM,MAAM,KAAK;AACjB,MAAI,IAAI,WAAW,KAAK,mBAAmB,KAAK,IAAI,CAClD,OAAM,IAAI,MACR,4BAA4B,IAAI,iCACjC;;;;;;;;AAUP,MAAM,uBAAuB;AAiE7B,SAAgB,MAAM,OAAoB,SAAiC;AACzE,KAAI,CAAC,MAAO,QAAO;CAEnB,MAAM,MAAM,MAAM,MAAM;AAIxB,KAAI,CAAC,WAAY,CAAC,QAAQ,UAAU,CAAC,QAAQ,YAAY,CAAC,QAAQ,KAChE,QAAO,aAAa,IAAI;AAG1B,QAAO,WAAW,KAAK,QAAQ;;;;;;;AAYjC,MAAM,iBAAiB;AAEvB,SAAS,aAAa,OAA2C;CAC/D,IAAI,MAAM;CACV,IAAI,OAA8B;CAElC,SAAS,cAAc,OAA2B,OAAqB;AACrE,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACrC,MAAM,OAAO,MAAM;AACnB,OAAI,OAAO,SAAS,SAClB,QAAO;OAEP,WAAU,MAAM,MAAM;;;CAK5B,SAAS,UAAU,MAAa,OAAqB;AACnD,MAAI,SAAS,sBAAsB;AACjC,OAAI,SAAS,KAAM,wBAAO,IAAI,SAAgB;AAC9C,OAAI,KAAK,IAAI,KAAK,CAChB,OAAM,IAAI,MAAM,4CAA4C;AAE9D,QAAK,IAAI,KAAK;;EAEhB,MAAM,MAAM,KAAK;AACjB,kBAAgB,IAAI;EACpB,MAAM,aAAa,KAAK;EACxB,MAAM,YAAY,IAAI,WAAW,EAAE;AAGnC,MAAI,eAAe,QAAQ,cAAc,KACvC,wBAAuB,WAAW;AAEpC,MAAI,eAAe,MAAM;AAEvB,OAAI,cAAc,UAAU;AAC1B,WAAO,MAAM,MAAM;AACnB;;AAEF,OAAI,cAAc,MAAM;AACtB,WAAO,MAAM,MAAM;AACnB;;AAEF,UAAO,MAAM,MAAM;SACd;AACL,UAAO,MAAM;GACb,MAAM,gBAAgB,cAAc;GACpC,MAAM,OAAO,OAAO,KAAK,WAAW;AACpC,QAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;IACpC,MAAM,gBAAgB,KAAK;IAC3B,MAAM,iBAAiB,WAAW;AAClC,QAAI,mBAAmB,KACrB,KAAI,iBAAiB,CAAC,eAAe,KAAK,cAAc,CACtD,QAAO,QAAO,gBAAgB;QAE9B,QAAO,MAAM;aAEN,eAAe,QAAQ,KAAI,KAAK,GACzC,QAAO,MAAM,gBAAgB,QAAO,iBAAiB;aAC5C,eAAe,QAAQ,IAAI,KAAK,GACzC,QAAO,MAAM,gBAAgB,OAAO,iBAAiB;QAGrD,QACE,MACA,gBACA,OACA,eAAe,QAAQ,MAAM,SAAS,GACtC;;AAGN,OAAI,cAAc,UAAU;AAC1B,WAAO;AACP;;AAEF,OAAI,cAAc,MAAM;AACtB,WAAO;AACP;;AAEF,UAAO;;AAET,gBAAc,KAAK,UAAU,QAAQ,EAAE;AACvC,SAAO,OAAO,MAAM;;AAGtB,eAAc,MAAM,QAAQ,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,EAAE;AACxD,QAAO;;AAOT,SAAS,WACP,OACA,SACQ;CACR,MAAM,SAAS,CAAC,QAAQ,SACpB,KACA,OAAO,QAAQ,WAAW,WACxB,QAAQ,SACR;CAGN,MAAM,iBAAiB,CAAC,CAAC,QAAQ;CACjC,MAAM,WAAW,CAAC,CAAC,QAAQ;CAC3B,MAAM,gBAA2C,iBAC7C,WACE,aACA,cACD,UAAU;;CAEf,MAAM,qBAAqB,UACzB,MAAM,WAAW,OAAO,GAAG,QAAQ,cAAc,MAAM;CACzD,MAAM,uBAAkD,iBACpD,WACE,aACA,mBACD,UAAU;CAGf,MAAM,UAA8B,WAChC,IAAI,IAAI,mBAAmB,GAC3B;AAGJ,KAAI,CAAC,QAAQ;EACX,IAAI,MAAM;EACV,IAAI,OAA8B;EAElC,SAAS,cAAc,OAA2B,OAAqB;AACrE,QAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;IACrC,MAAM,OAAO,MAAM;AACnB,QAAI,OAAO,SAAS,SAClB,QAAO,kBAAkB,KAAK;QAE9B,WAAU,MAAM,MAAM;;;EAK5B,SAAS,UAAU,MAAa,OAAqB;AACnD,OAAI,SAAS,sBAAsB;AACjC,QAAI,SAAS,KAAM,wBAAO,IAAI,SAAgB;AAC9C,QAAI,KAAK,IAAI,KAAK,CAChB,OAAM,IAAI,MAAM,4CAA4C;AAE9D,SAAK,IAAI,KAAK;;GAEhB,MAAM,MAAM,KAAK;AACjB,mBAAgB,IAAI;GACpB,MAAM,aAAa,KAAK;GACxB,MAAM,YAAY,IAAI,WAAW,EAAE;AAGnC,OAAI,eAAe,QAAQ,cAAc,KACvC,wBAAuB,WAAW;AAEpC,OAAI,eAAe,MAAM;AACvB,QAAI,cAAc,UAAU;AAC1B,YAAO,MAAM,MAAM;AACnB;;AAGF,QAAI,cAAc,MAAM;AACtB,YAAO,MAAM,MAAM;AACnB;;AAGF,QAAI,YAAY,QAAQ,QAAQ,IAAI,IAAI,EAAE;AACxC,YAAO,MAAM,MAAM;AACnB;;AAEF,WAAO,MAAM,MAAM;UACd;AACL,WAAO,MAAM;IACb,MAAM,gBAAgB,cAAc;IACpC,MAAM,OAAO,OAAO,KAAK,WAAW;AACpC,SAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;KACpC,MAAM,gBAAgB,KAAK;KAC3B,MAAM,iBAAiB,WAAW;AAClC,SAAI,mBAAmB,KACrB,KAAI,iBAAiB,CAAC,eAAe,KAAK,cAAc,CACtD,QAAO,QAAO,gBAAgB;SAE9B,QAAO,MAAM;UAEV;MACL,MAAM,UAAU,qBAAqB,eAAe;AACpD,UAAI,QAAQ,QAAQ,KAAI,KAAK,GAC3B,QAAO,MAAM,gBAAgB,QAAO,UAAU;eACrC,QAAQ,QAAQ,IAAI,KAAK,GAClC,QAAO,MAAM,gBAAgB,OAAO,UAAU;UAG9C,QACE,MACA,gBACA,OACA,QAAQ,QAAQ,MAAM,SAAS,GAC/B;;;AAIR,QAAI,cAAc,UAAU;AAC1B,YAAO;AACP;;AAGF,QAAI,cAAc,MAAM;AACtB,YAAO;AACP;;AAGF,QAAI,YAAY,QAAQ,QAAQ,IAAI,IAAI,EAAE;AACxC,YAAO;AACP;;AAEF,WAAO;;AAET,iBAAc,KAAK,UAAU,QAAQ,EAAE;AACvC,UAAO,OAAO,MAAM;;AAGtB,gBAAc,MAAM,QAAQ,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,EAAE;AACxD,SAAO;;CAIT,IAAI,MAAM;CACV,IAAI,OAA8B;CAElC,SAAS,gBAAgB,OAAoC;AAC3D,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACrC,MAAM,OAAO,MAAM;AAEnB,OAAI,OAAO,SAAS,YAAY,CAAC,KAAK,WAAW,OAAO,CAAE,QAAO;;AAEnE,SAAO;;CAGT,SAAS,sBAAsB,MAAa,gBAAgB,OAAa;AACvE,MAAI,KAAK,eAAe,KAAM;EAC9B,MAAM,OAAO,OAAO,KAAK,KAAK,WAAW;AACzC,OAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;GACpC,MAAM,MAAM,KAAK;GACjB,MAAM,iBAAiB,KAAK,WAAW;AACvC,OAAI,mBAAmB,KACrB,KAAI,iBAAiB,CAAC,eAAe,KAAK,IAAI,CAC5C,QAAO,QAAO,MAAM;OAEpB,QAAO,MAAM;QAEV;IACL,MAAM,UAAU,qBAAqB,eAAe;AACpD,QAAI,QAAQ,QAAQ,KAAI,KAAK,GAC3B,QAAO,MAAM,MAAM,QAAO,UAAU;aAC3B,QAAQ,QAAQ,IAAI,KAAK,GAClC,QAAO,MAAM,MAAM,OAAO,UAAU;QAGpC,QAAO,MAAM,MAAM,OAAO,QAAQ,QAAQ,MAAM,SAAS,GAAG;;;;CAMpE,SAAS,gBAAgB,MAAa,OAAqB;AACzD,MAAI,CAAC,KAAM;AACX,MAAI,SAAS,sBAAsB;AACjC,OAAI,SAAS,KAAM,wBAAO,IAAI,SAAgB;AAC9C,OAAI,KAAK,IAAI,KAAK,CAChB,OAAM,IAAI,MAAM,4CAA4C;AAE9D,QAAK,IAAI,KAAK;;EAEhB,MAAM,MAAM,KAAK;AACjB,kBAAgB,IAAI;EACpB,MAAM,UAAU,OAAO,OAAO,MAAM;EACpC,MAAM,YAAY,IAAI,WAAW,EAAE;AAGnC,MAAI,KAAK,eAAe,QAAQ,cAAc,KAC5C,wBAAuB,KAAK,WAAW;AAIzC,MAAI,cAAc,UAAU;AAC1B,UAAO,UAAU,MAAM;AACvB,yBAAsB,KAAK;AAC3B,UAAO;AACP;;AAIF,MAAI,cAAc,MAAM;AACtB,UAAO,UAAU,MAAM;AACvB,yBAAsB,MAAM,KAAK;AACjC,UAAO;AACP;;EAGF,MAAM,WAAW,KAAK;EACtB,MAAM,iBAAiB,SAAS;AAGhC,MAAI,YAAY,QAAQ,QAAQ,IAAI,IAAI,EAAE;AACxC,UAAO,UAAU,MAAM;AACvB,yBAAsB,KAAK;AAC3B,UAAO;AACP;;AAIF,MAAI,mBAAmB,GAAG;AACxB,UAAO,UAAU,MAAM;AACvB,yBAAsB,KAAK;AAC3B,UAAO;AACP;;EAIF,MAAM,UAAU,gBAAgB,SAAS;EACzC,IAAI,cAAc;AAClB,MAAI;QACG,IAAI,IAAI,GAAG,IAAI,gBAAgB,IAClC,KAAI,OAAO,SAAS,OAAO,UAAU;AACnC,kBAAc;AACd;;;AAON,MAAI,WAAW,CAAC,aAAa;AAC3B,UAAO,UAAU,MAAM;AACvB,yBAAsB,KAAK;AAC3B,UAAO;GACP,IAAI,QAAQ;AACZ,QAAK,IAAI,IAAI,GAAG,IAAI,gBAAgB,KAAK;IACvC,MAAM,OAAQ,SAAS,GAAc,MAAM;AAC3C,QAAI,KAAK,WAAW,EAAG;AACvB,QAAI,CAAC,MAAO,QAAO;AACnB,YAAQ;AACR,WAAO,kBAAkB,KAAK;;AAEhC,UAAO,OAAO,MAAM;AACpB;;AAKF,MAAI,SAAS;GACX,MAAM,eAAe,OAAO,OAAO,QAAQ,EAAE;AAC7C,UAAO,UAAU,MAAM;AACvB,yBAAsB,KAAK;AAC3B,UAAO;AACP,QAAK,IAAI,IAAI,GAAG,IAAI,gBAAgB,KAAK;IACvC,MAAM,QAAQ,SAAS;AACvB,QAAI,OAAO,UAAU,SAEnB,KAAI,MAAM,WAAW,OAAO,CAC1B,QAAO,OAAO,eAAe,kBAAkB,MAAM;SAChD;KACL,MAAM,UAAU,MAAM,MAAM;AAC5B,SAAI,QAAQ,SAAS,EACnB,QAAO,OAAO,eAAe,kBAAkB,QAAQ;;SAGtD;AACL,YAAO;AACP,qBAAgB,OAAO,QAAQ,EAAE;;;AAGrC,UAAO,OAAO,UAAU,OAAO,MAAM;AACrC;;AAIF,SAAO,UAAU,MAAM;AACvB,wBAAsB,KAAK;AAC3B,SAAO;AACP,OAAK,IAAI,IAAI,GAAG,IAAI,gBAAgB,KAAK;GACvC,MAAM,QAAQ,SAAS;AACvB,OAAI,OAAO,UAAU,SAEnB,QAAO,OAAO,OAAO,OAAO,QAAQ,EAAE,GAAG,kBAAkB,MAAM;QAC5D;AACL,WAAO;AACP,oBAAgB,OAAO,QAAQ,EAAE;;;AAGrC,SAAO,OAAO,UAAU,OAAO,MAAM;;CAGvC,MAAM,QAAQ,MAAM,QAAQ,MAAM,GAAG,QAAQ,CAAC,MAAM;AACpD,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACrC,MAAM,OAAO,MAAM;AACnB,MAAI,IAAI,EAAG,QAAO;AAClB,MAAI,OAAO,SAAS,SAClB,QAAO,kBAAkB,KAAK;MAE9B,iBAAgB,MAAM,EAAE;;AAI5B,QAAO;;;;;;;;;;;;;AAkBT,SAAS,MACP,OAM4B;AAC5B,KAAI,UAAU,QAAQ,UAAU,KAAA,EAAW,QAAO,EAAE;AAGpD,KAAI,CAAC,MAAM,QAAQ,MAAM,IAAI,OAAO,UAAU,YAAY,QAAQ,MAAM,CACtE,QAAO;AAIT,KAAI,CAAC,MAAM,QAAQ,MAAM,IAAI,OAAO,UAAU,SAC5C,QAAO,UAAU,MAAoB;AAIvC,KAAI,CAAC,MAAM,QAAQ,MAAM,CACvB,QAAO,UAAU,MAAoB;CAIvC,MAAM,QAAQ;AACd,KAAI,MAAM,WAAW,EAAG,QAAO,EAAE;CAGjC,IAAI,SAAkB,KAAA;AACtB,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,IAChC,KAAI,OAAO,MAAM,OAAO,UAAU;AAChC,WAAS,MAAM;AACf;;AAKJ,KAAI,WAAW,KAAA,EAAW,QAAO;AAGjC,KAAI,OAAO,WAAW,YAAY,WAAW,QAAQ,QAAQ,OAAO,CAClE,QAAO;AAIT,KACE,OAAO,WAAW,YAClB,WAAW,QACX,gBAAgB,OAAO,CAEvB,QAAO,aAAa,MAAyB;AAI/C,QAAO,UAAU,MAAsB;;;AAIzC,SAAS,QAAQ,OAAgC;AAC/C,QACE,OAAO,UAAU,YACjB,UAAU,QACV,OAAQ,MAAgB,YAAY,YACpC,MAAM,QAAS,MAAgB,SAAS;;;AAK5C,SAAS,gBAAgB,OAAwC;AAC/D,KAAI,OAAO,UAAU,YAAY,UAAU,KAAM,QAAO;CACxD,MAAM,OAAO,OAAO,KAAK,MAAM;AAC/B,KAAI,KAAK,WAAW,EAAG,QAAO;AAE9B,KAAI,WAAW,SAAS,cAAc,SAAS,WAAW,MAAO,QAAO;AAExE,KACE,KAAK,WAAW,KAChB,MAAM,QAAS,MAAkC,KAAK,IAAK,CAE3D,QAAO;AAET,QAAO"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { n as TNode } from "./parser-BfdEfWDg.mjs";
|
|
2
|
+
import { LosslessEntry } from "./converters/lossless.mjs";
|
|
3
|
+
import { LossyValue } from "./converters/lossy.mjs";
|
|
4
|
+
|
|
5
|
+
//#region src/xmlParseStream.d.ts
|
|
6
|
+
/**
|
|
7
|
+
* Options for `XmlParseStream`.
|
|
8
|
+
*
|
|
9
|
+
* Only the fields that the transform layer actually consumes are exposed here.
|
|
10
|
+
* This keeps the API honest — options like `trimWhitespace`, `entities`, and
|
|
11
|
+
* `strict` belong to the synchronous `parse()` function, not the streaming
|
|
12
|
+
* tree-builder.
|
|
13
|
+
*/
|
|
14
|
+
interface XmlParseStreamOptions {
|
|
15
|
+
/**
|
|
16
|
+
* Starting byte offset — skip this many leading characters from the input.
|
|
17
|
+
* When a string is passed, its `.length` is used as the offset.
|
|
18
|
+
*/
|
|
19
|
+
offset?: number | string;
|
|
20
|
+
/** Enable HTML parsing mode (sets default selfClosingTags/rawContentTags). */
|
|
21
|
+
html?: boolean;
|
|
22
|
+
/**
|
|
23
|
+
* Tag names that are self-closing (void elements).
|
|
24
|
+
* Defaults to standard HTML void elements when `html` is `true`, else `[]`.
|
|
25
|
+
*/
|
|
26
|
+
selfClosingTags?: string[];
|
|
27
|
+
/**
|
|
28
|
+
* Tag names whose content is raw text (not parsed as XML/HTML).
|
|
29
|
+
* Defaults to `["script", "style"]` when `html` is `true`, else `[]`.
|
|
30
|
+
*/
|
|
31
|
+
rawContentTags?: string[];
|
|
32
|
+
/** Keep XML comments in the output. Defaults to `false`. */
|
|
33
|
+
keepComments?: boolean;
|
|
34
|
+
/**
|
|
35
|
+
* Emit only elements matching these tag names instead of waiting for the
|
|
36
|
+
* entire top-level tree to close.
|
|
37
|
+
*
|
|
38
|
+
* When set, each matching element is emitted as a standalone `TNode` subtree
|
|
39
|
+
* the moment its close tag is encountered, regardless of nesting depth.
|
|
40
|
+
* Non-matching ancestor elements are **not** built or emitted — the stream
|
|
41
|
+
* only produces the selected subtrees.
|
|
42
|
+
*
|
|
43
|
+
* When multiple selected tags are nested (e.g. selecting both `item` and
|
|
44
|
+
* `sub` where `<sub>` appears inside `<item>`), each matching element
|
|
45
|
+
* is emitted independently as it closes. The inner element appears both as a
|
|
46
|
+
* separate emission **and** as a child within its ancestor's subtree.
|
|
47
|
+
*
|
|
48
|
+
* Accepts a single tag name or an array of tag names.
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```ts
|
|
52
|
+
* // Given:
|
|
53
|
+
* // <root>
|
|
54
|
+
* // <item>
|
|
55
|
+
* // <sub>1</sub><box>a</box>
|
|
56
|
+
* // </item>
|
|
57
|
+
* // <item>
|
|
58
|
+
* // <sub>2</sub><box>b</box>
|
|
59
|
+
* // </item>
|
|
60
|
+
* // </root>
|
|
61
|
+
* //
|
|
62
|
+
* // Without select: emits one big <root> TNode after </root>
|
|
63
|
+
* // With select: "item": emits two <item> TNodes as each closes
|
|
64
|
+
* const stream = new XmlParseStream({ select: 'item' });
|
|
65
|
+
*
|
|
66
|
+
* // Nested selection: emits each <sub> as it closes, then the
|
|
67
|
+
* // containing <item> (which still includes the <sub> as a child).
|
|
68
|
+
* const stream2 = new XmlParseStream({ select: ['item', 'sub'] });
|
|
69
|
+
* ```
|
|
70
|
+
*/
|
|
71
|
+
select?: string | string[];
|
|
72
|
+
/**
|
|
73
|
+
* Output format for emitted chunks.
|
|
74
|
+
*
|
|
75
|
+
* - `'dom'` (default) — emit raw `TNode | string` values.
|
|
76
|
+
* - `'lossy'` — convert each item to the compact lossy format (`LossyValue`).
|
|
77
|
+
* - `'lossless'` — convert each item to the order-preserving lossless format
|
|
78
|
+
* (`LosslessEntry`).
|
|
79
|
+
*/
|
|
80
|
+
output?: 'dom' | 'lossy' | 'lossless';
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* A Web Streams `TransformStream` that incrementally parses XML chunks into
|
|
84
|
+
* `TNode` subtrees (or lossy/lossless objects). Works in browsers, Node.js
|
|
85
|
+
* 18+, Deno, and Bun.
|
|
86
|
+
*
|
|
87
|
+
* Follows the platform stream class convention (like `TextDecoderStream`,
|
|
88
|
+
* `DecompressionStream`, etc.) — instantiate with `new` and use with
|
|
89
|
+
* `.pipeThrough()`.
|
|
90
|
+
*
|
|
91
|
+
* Internally powered by the SAX engine with a tree-construction layer
|
|
92
|
+
* that assembles `TNode` subtrees and emits them as they complete.
|
|
93
|
+
*
|
|
94
|
+
* By default, nodes are emitted when a top-level element closes (depth 0).
|
|
95
|
+
* Use the `select` option to emit specific elements as they close at any depth,
|
|
96
|
+
* without waiting for the entire document root to finish.
|
|
97
|
+
*
|
|
98
|
+
* Use the `output` option to choose the emitted format:
|
|
99
|
+
* - `'dom'` (default) — raw `TNode | string`
|
|
100
|
+
* - `'lossy'` — compact lossy objects (`LossyValue`)
|
|
101
|
+
* - `'lossless'` — order-preserving objects (`LosslessEntry`)
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* import { XmlParseStream } from '@eksml/xml/stream';
|
|
106
|
+
*
|
|
107
|
+
* const response = await fetch('/feed.xml');
|
|
108
|
+
* const reader = response.body
|
|
109
|
+
* .pipeThrough(new TextDecoderStream())
|
|
110
|
+
* .pipeThrough(new XmlParseStream())
|
|
111
|
+
* .getReader();
|
|
112
|
+
*
|
|
113
|
+
* while (true) {
|
|
114
|
+
* const { done, value } = await reader.read();
|
|
115
|
+
* if (done) break;
|
|
116
|
+
* console.log(value); // TNode or string
|
|
117
|
+
* }
|
|
118
|
+
* ```
|
|
119
|
+
*/
|
|
120
|
+
declare class XmlParseStream<TOutput = TNode | string> extends TransformStream<string, TOutput> {
|
|
121
|
+
/** Default DOM output (`TNode | string`). */
|
|
122
|
+
constructor(options?: XmlParseStreamOptions & {
|
|
123
|
+
output?: 'dom';
|
|
124
|
+
});
|
|
125
|
+
/** Lossy output — each item is converted to `LossyValue`. */
|
|
126
|
+
constructor(options: XmlParseStreamOptions & {
|
|
127
|
+
output: 'lossy';
|
|
128
|
+
});
|
|
129
|
+
/** Lossless output — each item is converted to `LosslessEntry`. */
|
|
130
|
+
constructor(options: XmlParseStreamOptions & {
|
|
131
|
+
output: 'lossless';
|
|
132
|
+
});
|
|
133
|
+
/** Dynamic output — when the `output` option is not a literal, returns the widest type. */
|
|
134
|
+
constructor(options: XmlParseStreamOptions);
|
|
135
|
+
}
|
|
136
|
+
//#endregion
|
|
137
|
+
export { type LosslessEntry, type LossyValue, XmlParseStream, XmlParseStreamOptions };
|
|
138
|
+
//# sourceMappingURL=xmlParseStream.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"xmlParseStream.d.mts","names":[],"sources":["../src/xmlParseStream.ts"],"mappings":";;;;;;;AAwBA;;;;;;UAAiB,qBAAA;EAiBf;;;;EAZA,MAAA;EA6DM;EA3DN,IAAA;EA6PyB;;;;EAxPzB,eAAA;EA+PqB;;;;EA1PrB,cAAA;EAmP2E;EAjP3E,YAAA;EAiPoC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA3MpC,MAAA;;;;;;;;;EASA,MAAA;AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cAkMW,cAAA,WAAyB,KAAA,mBAAwB,eAAA,SAE5D,OAAA;;cAGY,OAAA,GAAU,qBAAA;IAA0B,MAAA;EAAA;;cAEpC,OAAA,EAAS,qBAAA;IAA0B,MAAA;EAAA;;cAEnC,OAAA,EAAS,qBAAA;IAA0B,MAAA;EAAA;;cAEnC,OAAA,EAAS,qBAAA;AAAA"}
|