@eksml/xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +588 -0
- package/dist/converters/fromLossless.d.mts +14 -0
- package/dist/converters/fromLossless.d.mts.map +1 -0
- package/dist/converters/fromLossless.mjs +35 -0
- package/dist/converters/fromLossless.mjs.map +1 -0
- package/dist/converters/fromLossy.d.mts +18 -0
- package/dist/converters/fromLossy.d.mts.map +1 -0
- package/dist/converters/fromLossy.mjs +91 -0
- package/dist/converters/fromLossy.mjs.map +1 -0
- package/dist/converters/lossless.d.mts +39 -0
- package/dist/converters/lossless.d.mts.map +1 -0
- package/dist/converters/lossless.mjs +74 -0
- package/dist/converters/lossless.mjs.map +1 -0
- package/dist/converters/lossy.d.mts +42 -0
- package/dist/converters/lossy.d.mts.map +1 -0
- package/dist/converters/lossy.mjs +158 -0
- package/dist/converters/lossy.mjs.map +1 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
- package/dist/parser-BfdEfWDg.d.mts +95 -0
- package/dist/parser-BfdEfWDg.d.mts.map +1 -0
- package/dist/parser-CYq309aR.mjs +479 -0
- package/dist/parser-CYq309aR.mjs.map +1 -0
- package/dist/parser.d.mts +2 -0
- package/dist/parser.mjs +2 -0
- package/dist/sax.d.mts +64 -0
- package/dist/sax.d.mts.map +1 -0
- package/dist/sax.mjs +70 -0
- package/dist/sax.mjs.map +1 -0
- package/dist/saxEngine-BDnD7ruG.mjs +750 -0
- package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
- package/dist/utilities/index.d.mts +88 -0
- package/dist/utilities/index.d.mts.map +1 -0
- package/dist/utilities/index.mjs +87 -0
- package/dist/utilities/index.mjs.map +1 -0
- package/dist/writer.d.mts +58 -0
- package/dist/writer.d.mts.map +1 -0
- package/dist/writer.mjs +357 -0
- package/dist/writer.mjs.map +1 -0
- package/dist/xmlParseStream.d.mts +138 -0
- package/dist/xmlParseStream.d.mts.map +1 -0
- package/dist/xmlParseStream.mjs +313 -0
- package/dist/xmlParseStream.mjs.map +1 -0
- package/package.json +100 -0
- package/src/converters/fromLossless.ts +80 -0
- package/src/converters/fromLossy.ts +180 -0
- package/src/converters/lossless.ts +116 -0
- package/src/converters/lossy.ts +274 -0
- package/src/parser.ts +728 -0
- package/src/sax.ts +157 -0
- package/src/saxEngine.ts +1157 -0
- package/src/utilities/escapeRegExp.ts +19 -0
- package/src/utilities/filter.ts +63 -0
- package/src/utilities/getElementById.ts +21 -0
- package/src/utilities/getElementsByClassName.ts +22 -0
- package/src/utilities/htmlConstants.ts +26 -0
- package/src/utilities/index.ts +7 -0
- package/src/utilities/isElementNode.ts +19 -0
- package/src/utilities/isTextNode.ts +19 -0
- package/src/utilities/toContentString.ts +23 -0
- package/src/writer.ts +650 -0
- package/src/xmlParseStream.ts +597 -0
package/src/writer.ts
ADDED
|
@@ -0,0 +1,650 @@
|
|
|
1
|
+
import { escapeText, escapeAttribute, escapeUTF8 } from 'entities';
|
|
2
|
+
import type { TNode } from '#src/parser.ts';
|
|
3
|
+
import type { LossyValue } from '#src/converters/lossy.ts';
|
|
4
|
+
import type { LosslessEntry } from '#src/converters/lossless.ts';
|
|
5
|
+
import { fromLossy } from '#src/converters/fromLossy.ts';
|
|
6
|
+
import { fromLossless } from '#src/converters/fromLossless.ts';
|
|
7
|
+
import { HTML_VOID_ELEMENTS } from '#src/utilities/htmlConstants.ts';
|
|
8
|
+
// @generated:char-codes:begin
|
|
9
|
+
const BANG = 33; // !
|
|
10
|
+
const QUESTION = 63; // ?
|
|
11
|
+
// @generated:char-codes:end
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Characters forbidden in XML tag names and attribute names.
|
|
15
|
+
* Covers structural delimiters (`<`, `>`, `=`), quote characters, and
|
|
16
|
+
* whitespace that would break well-formedness if interpolated unchecked.
|
|
17
|
+
*/
|
|
18
|
+
const INVALID_NAME_CHARS = /[<>=\s"']/;
|
|
19
|
+
|
|
20
|
+
function validateTagName(tag: string): void {
|
|
21
|
+
if (tag.length === 0) {
|
|
22
|
+
throw new Error('Invalid tag name: tag name must not be empty');
|
|
23
|
+
}
|
|
24
|
+
// Allow leading `?` (PI) or `!` (DOCTYPE), validate the rest
|
|
25
|
+
const name =
|
|
26
|
+
tag.charCodeAt(0) === QUESTION || tag.charCodeAt(0) === BANG
|
|
27
|
+
? tag.substring(1)
|
|
28
|
+
: tag;
|
|
29
|
+
if (name.length === 0) return; // bare `?` or `!` is degenerate but harmless
|
|
30
|
+
if (INVALID_NAME_CHARS.test(name)) {
|
|
31
|
+
throw new Error(`Invalid tag name: "${tag}" contains forbidden characters`);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function validateAttributeNames(attributes: Record<string, unknown>): void {
|
|
36
|
+
const keys = Object.keys(attributes);
|
|
37
|
+
for (let i = 0; i < keys.length; i++) {
|
|
38
|
+
const key = keys[i]!;
|
|
39
|
+
if (key.length === 0 || INVALID_NAME_CHARS.test(key)) {
|
|
40
|
+
throw new Error(
|
|
41
|
+
`Invalid attribute name: "${key}" contains forbidden characters`,
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Recursion depth at which the circular-reference WeakSet guard kicks in.
|
|
49
|
+
* Below this depth, no WeakSet overhead is incurred — the natural call-stack
|
|
50
|
+
* limit provides a safety net. Typical XML documents have depth < 10.
|
|
51
|
+
*/
|
|
52
|
+
const CIRCULAR_CHECK_DEPTH = 16;
|
|
53
|
+
|
|
54
|
+
/** Options for write. */
|
|
55
|
+
export interface WriterOptions {
|
|
56
|
+
/**
|
|
57
|
+
* Pretty-print with indentation. When enabled, each element is placed on
|
|
58
|
+
* its own line with proper nesting indentation.
|
|
59
|
+
*
|
|
60
|
+
* - `true` uses two spaces as the indent.
|
|
61
|
+
* - A string value is used as the indent directly (e.g. `"\t"`, `" "`).
|
|
62
|
+
*
|
|
63
|
+
* Text-only elements are kept inline: `<name>Alice</name>`.
|
|
64
|
+
* Mixed content (text interleaved with elements) is also kept inline to
|
|
65
|
+
* avoid altering whitespace semantics.
|
|
66
|
+
*/
|
|
67
|
+
pretty?: boolean | string;
|
|
68
|
+
/**
|
|
69
|
+
* Encode special characters in text content and attribute values as XML
|
|
70
|
+
* entities. In XML mode (default), `&`, `<`, `>` are encoded in text and
|
|
71
|
+
* `&`, `"`, `'` are encoded in attributes. In HTML mode (`html: true`),
|
|
72
|
+
* the full HTML named entity set is used (e.g. `©`, `é`).
|
|
73
|
+
*
|
|
74
|
+
* Defaults to `false` — text and attribute values are written verbatim.
|
|
75
|
+
*/
|
|
76
|
+
entities?: boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Enable HTML mode. When enabled:
|
|
79
|
+
* - Void elements (`<br>`, `<img>`, `<hr>`, etc.) are self-closed without
|
|
80
|
+
* a closing tag (e.g. `<br>` instead of `<br></br>` or `<br/>`).
|
|
81
|
+
* - When `entities` is also `true`, uses HTML named entities
|
|
82
|
+
* (e.g. `©` instead of `©`) for encoding.
|
|
83
|
+
*/
|
|
84
|
+
html?: boolean;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Input types accepted by `write()`. */
|
|
88
|
+
export type WriterInput =
|
|
89
|
+
| TNode
|
|
90
|
+
| (TNode | string)[]
|
|
91
|
+
| LossyValue
|
|
92
|
+
| LossyValue[]
|
|
93
|
+
| LosslessEntry[];
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Serialize a parsed DOM, lossy object, or lossless entry array back to XML.
|
|
97
|
+
*
|
|
98
|
+
* Input format is auto-detected:
|
|
99
|
+
* - `TNode` or `(TNode | string)[]` — DOM tree, serialized directly
|
|
100
|
+
* - `LossyValue` or `LossyValue[]` — converted to DOM via `fromLossy()` first
|
|
101
|
+
* - `LosslessEntry[]` — converted to DOM via `fromLossless()` first
|
|
102
|
+
*
|
|
103
|
+
* @param input - The node(s), lossy object(s), or lossless entries to serialize
|
|
104
|
+
* @param options - Formatting options
|
|
105
|
+
* @returns XML string
|
|
106
|
+
*/
|
|
107
|
+
export function write(
|
|
108
|
+
input: TNode | (TNode | string)[],
|
|
109
|
+
options?: WriterOptions,
|
|
110
|
+
): string;
|
|
111
|
+
export function write(
|
|
112
|
+
input: LossyValue | LossyValue[],
|
|
113
|
+
options?: WriterOptions,
|
|
114
|
+
): string;
|
|
115
|
+
export function write(input: LosslessEntry[], options?: WriterOptions): string;
|
|
116
|
+
export function write(input: WriterInput, options?: WriterOptions): string;
|
|
117
|
+
export function write(input: WriterInput, options?: WriterOptions): string {
|
|
118
|
+
if (!input) return '';
|
|
119
|
+
|
|
120
|
+
const dom = toDom(input);
|
|
121
|
+
|
|
122
|
+
// Fast path: no options — skip all option parsing, closure creation,
|
|
123
|
+
// identity function allocation, and voidSet construction.
|
|
124
|
+
if (!options || (!options.pretty && !options.entities && !options.html)) {
|
|
125
|
+
return compactWrite(dom);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return fullWriter(dom, options);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
// Fast path — compact output, no entities, no HTML mode
|
|
133
|
+
// ---------------------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Test whether a string is a simple keyword that can appear unquoted in a
|
|
137
|
+
* DOCTYPE declaration (e.g. `html`, `PUBLIC`, `SYSTEM`).
|
|
138
|
+
* Anything else (URIs, public identifiers) must be double-quoted.
|
|
139
|
+
*/
|
|
140
|
+
const SIMPLE_KEYWORD = /^[A-Za-z][A-Za-z0-9_-]*$/;
|
|
141
|
+
|
|
142
|
+
function compactWrite(input: TNode | (TNode | string)[]): string {
|
|
143
|
+
let out = '';
|
|
144
|
+
let seen: WeakSet<TNode> | null = null;
|
|
145
|
+
|
|
146
|
+
function writeChildren(nodes: (TNode | string)[], depth: number): void {
|
|
147
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
148
|
+
const node = nodes[i]!;
|
|
149
|
+
if (typeof node === 'string') {
|
|
150
|
+
out += node;
|
|
151
|
+
} else {
|
|
152
|
+
writeNode(node, depth);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function writeNode(node: TNode, depth: number): void {
|
|
158
|
+
if (depth >= CIRCULAR_CHECK_DEPTH) {
|
|
159
|
+
if (seen === null) seen = new WeakSet<TNode>();
|
|
160
|
+
if (seen.has(node)) {
|
|
161
|
+
throw new Error('Circular reference detected in TNode tree');
|
|
162
|
+
}
|
|
163
|
+
seen.add(node);
|
|
164
|
+
}
|
|
165
|
+
const tag = node.tagName;
|
|
166
|
+
validateTagName(tag);
|
|
167
|
+
const attributes = node.attributes;
|
|
168
|
+
const firstChar = tag.charCodeAt(0);
|
|
169
|
+
// Skip attribute name validation for declarations (!DOCTYPE etc.)
|
|
170
|
+
// where keys are quoted identifiers, not XML attribute names
|
|
171
|
+
if (attributes !== null && firstChar !== BANG) {
|
|
172
|
+
validateAttributeNames(attributes);
|
|
173
|
+
}
|
|
174
|
+
if (attributes === null) {
|
|
175
|
+
// No attributes — combine open tag into one concat
|
|
176
|
+
if (firstChar === QUESTION) {
|
|
177
|
+
out += '<' + tag + '?>';
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
if (firstChar === BANG) {
|
|
181
|
+
out += '<' + tag + '>';
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
out += '<' + tag + '>';
|
|
185
|
+
} else {
|
|
186
|
+
out += '<' + tag;
|
|
187
|
+
const isDeclaration = firstChar === BANG;
|
|
188
|
+
const keys = Object.keys(attributes);
|
|
189
|
+
for (let j = 0; j < keys.length; j++) {
|
|
190
|
+
const attributeName = keys[j]!;
|
|
191
|
+
const attributeValue = attributes[attributeName];
|
|
192
|
+
if (attributeValue === null) {
|
|
193
|
+
if (isDeclaration && !SIMPLE_KEYWORD.test(attributeName)) {
|
|
194
|
+
out += ' "' + attributeName + '"';
|
|
195
|
+
} else {
|
|
196
|
+
out += ' ' + attributeName;
|
|
197
|
+
}
|
|
198
|
+
} else if (attributeValue.indexOf('"') === -1) {
|
|
199
|
+
out += ' ' + attributeName + '="' + attributeValue + '"';
|
|
200
|
+
} else if (attributeValue.indexOf("'") === -1) {
|
|
201
|
+
out += ' ' + attributeName + "='" + attributeValue + "'";
|
|
202
|
+
} else {
|
|
203
|
+
// Value contains both quote types — escape single quotes
|
|
204
|
+
out +=
|
|
205
|
+
' ' +
|
|
206
|
+
attributeName +
|
|
207
|
+
"='" +
|
|
208
|
+
attributeValue.replace(/'/g, ''') +
|
|
209
|
+
"'";
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (firstChar === QUESTION) {
|
|
213
|
+
out += '?>';
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
if (firstChar === BANG) {
|
|
217
|
+
out += '>';
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
out += '>';
|
|
221
|
+
}
|
|
222
|
+
writeChildren(node.children, depth + 1);
|
|
223
|
+
out += '</' + tag + '>';
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
writeChildren(Array.isArray(input) ? input : [input], 0);
|
|
227
|
+
return out;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// ---------------------------------------------------------------------------
|
|
231
|
+
// Full-featured path — entities, HTML mode, and/or pretty printing
|
|
232
|
+
// ---------------------------------------------------------------------------
|
|
233
|
+
|
|
234
|
+
function fullWriter(
|
|
235
|
+
input: TNode | (TNode | string)[],
|
|
236
|
+
options: WriterOptions,
|
|
237
|
+
): string {
|
|
238
|
+
const indent = !options.pretty
|
|
239
|
+
? ''
|
|
240
|
+
: typeof options.pretty === 'string'
|
|
241
|
+
? options.pretty
|
|
242
|
+
: ' ';
|
|
243
|
+
|
|
244
|
+
// Entity encoding functions — identity when disabled
|
|
245
|
+
const encodeEntities = !!options.entities;
|
|
246
|
+
const htmlMode = !!options.html;
|
|
247
|
+
const encodeRawText: (input: string) => string = encodeEntities
|
|
248
|
+
? htmlMode
|
|
249
|
+
? escapeUTF8
|
|
250
|
+
: escapeText
|
|
251
|
+
: (input) => input;
|
|
252
|
+
/** Encode text content, but pass comments through verbatim. */
|
|
253
|
+
const encodeTextContent = (input: string): string =>
|
|
254
|
+
input.startsWith('<!--') ? input : encodeRawText(input);
|
|
255
|
+
const encodeAttributeValue: (input: string) => string = encodeEntities
|
|
256
|
+
? htmlMode
|
|
257
|
+
? escapeUTF8
|
|
258
|
+
: escapeAttribute
|
|
259
|
+
: (input) => input;
|
|
260
|
+
|
|
261
|
+
// HTML void elements — self-close without </tag> in html mode
|
|
262
|
+
const voidSet: Set<string> | null = htmlMode
|
|
263
|
+
? new Set(HTML_VOID_ELEMENTS)
|
|
264
|
+
: null;
|
|
265
|
+
|
|
266
|
+
// Compact path with entities/html support
|
|
267
|
+
if (!indent) {
|
|
268
|
+
let out = '';
|
|
269
|
+
let seen: WeakSet<TNode> | null = null;
|
|
270
|
+
|
|
271
|
+
function writeChildren(nodes: (TNode | string)[], depth: number): void {
|
|
272
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
273
|
+
const node = nodes[i]!;
|
|
274
|
+
if (typeof node === 'string') {
|
|
275
|
+
out += encodeTextContent(node);
|
|
276
|
+
} else {
|
|
277
|
+
writeNode(node, depth);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function writeNode(node: TNode, depth: number): void {
|
|
283
|
+
if (depth >= CIRCULAR_CHECK_DEPTH) {
|
|
284
|
+
if (seen === null) seen = new WeakSet<TNode>();
|
|
285
|
+
if (seen.has(node)) {
|
|
286
|
+
throw new Error('Circular reference detected in TNode tree');
|
|
287
|
+
}
|
|
288
|
+
seen.add(node);
|
|
289
|
+
}
|
|
290
|
+
const tag = node.tagName;
|
|
291
|
+
validateTagName(tag);
|
|
292
|
+
const attributes = node.attributes;
|
|
293
|
+
const firstChar = tag.charCodeAt(0);
|
|
294
|
+
// Skip attribute name validation for declarations (!DOCTYPE etc.)
|
|
295
|
+
// where keys are quoted identifiers, not XML attribute names
|
|
296
|
+
if (attributes !== null && firstChar !== BANG) {
|
|
297
|
+
validateAttributeNames(attributes);
|
|
298
|
+
}
|
|
299
|
+
if (attributes === null) {
|
|
300
|
+
if (firstChar === QUESTION) {
|
|
301
|
+
out += '<' + tag + '?>';
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
// Declaration tags (e.g. !DOCTYPE) — void in all modes
|
|
305
|
+
if (firstChar === BANG) {
|
|
306
|
+
out += '<' + tag + '>';
|
|
307
|
+
return;
|
|
308
|
+
}
|
|
309
|
+
// HTML void elements self-close without a closing tag
|
|
310
|
+
if (voidSet !== null && voidSet.has(tag)) {
|
|
311
|
+
out += '<' + tag + '>';
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
314
|
+
out += '<' + tag + '>';
|
|
315
|
+
} else {
|
|
316
|
+
out += '<' + tag;
|
|
317
|
+
const isDeclaration = firstChar === BANG;
|
|
318
|
+
const keys = Object.keys(attributes);
|
|
319
|
+
for (let j = 0; j < keys.length; j++) {
|
|
320
|
+
const attributeName = keys[j]!;
|
|
321
|
+
const attributeValue = attributes[attributeName];
|
|
322
|
+
if (attributeValue === null) {
|
|
323
|
+
if (isDeclaration && !SIMPLE_KEYWORD.test(attributeName)) {
|
|
324
|
+
out += ' "' + attributeName + '"';
|
|
325
|
+
} else {
|
|
326
|
+
out += ' ' + attributeName;
|
|
327
|
+
}
|
|
328
|
+
} else {
|
|
329
|
+
const encoded = encodeAttributeValue(attributeValue);
|
|
330
|
+
if (encoded.indexOf('"') === -1) {
|
|
331
|
+
out += ' ' + attributeName + '="' + encoded + '"';
|
|
332
|
+
} else if (encoded.indexOf("'") === -1) {
|
|
333
|
+
out += ' ' + attributeName + "='" + encoded + "'";
|
|
334
|
+
} else {
|
|
335
|
+
// Value contains both quote types — escape single quotes
|
|
336
|
+
out +=
|
|
337
|
+
' ' +
|
|
338
|
+
attributeName +
|
|
339
|
+
"='" +
|
|
340
|
+
encoded.replace(/'/g, ''') +
|
|
341
|
+
"'";
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
if (firstChar === QUESTION) {
|
|
346
|
+
out += '?>';
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
// Declaration tags (e.g. !DOCTYPE) — void in all modes
|
|
350
|
+
if (firstChar === BANG) {
|
|
351
|
+
out += '>';
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
// HTML void elements self-close without a closing tag
|
|
355
|
+
if (voidSet !== null && voidSet.has(tag)) {
|
|
356
|
+
out += '>';
|
|
357
|
+
return;
|
|
358
|
+
}
|
|
359
|
+
out += '>';
|
|
360
|
+
}
|
|
361
|
+
writeChildren(node.children, depth + 1);
|
|
362
|
+
out += '</' + tag + '>';
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
writeChildren(Array.isArray(input) ? input : [input], 0);
|
|
366
|
+
return out;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Pretty path
|
|
370
|
+
let out = '';
|
|
371
|
+
let seen: WeakSet<TNode> | null = null;
|
|
372
|
+
|
|
373
|
+
function hasTextChildren(nodes: (TNode | string)[]): boolean {
|
|
374
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
375
|
+
const node = nodes[i];
|
|
376
|
+
// Comments are structural (like elements) — they don't force inline mode
|
|
377
|
+
if (typeof node === 'string' && !node.startsWith('<!--')) return true;
|
|
378
|
+
}
|
|
379
|
+
return false;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function prettyWriteAttributes(node: TNode, isDeclaration = false): void {
|
|
383
|
+
if (node.attributes === null) return;
|
|
384
|
+
const keys = Object.keys(node.attributes);
|
|
385
|
+
for (let j = 0; j < keys.length; j++) {
|
|
386
|
+
const key = keys[j]!;
|
|
387
|
+
const attributeValue = node.attributes[key];
|
|
388
|
+
if (attributeValue === null) {
|
|
389
|
+
if (isDeclaration && !SIMPLE_KEYWORD.test(key)) {
|
|
390
|
+
out += ' "' + key + '"';
|
|
391
|
+
} else {
|
|
392
|
+
out += ' ' + key;
|
|
393
|
+
}
|
|
394
|
+
} else {
|
|
395
|
+
const encoded = encodeAttributeValue(attributeValue);
|
|
396
|
+
if (encoded.indexOf('"') === -1) {
|
|
397
|
+
out += ' ' + key + '="' + encoded + '"';
|
|
398
|
+
} else if (encoded.indexOf("'") === -1) {
|
|
399
|
+
out += ' ' + key + "='" + encoded + "'";
|
|
400
|
+
} else {
|
|
401
|
+
// Value contains both quote types — escape single quotes
|
|
402
|
+
out += ' ' + key + "='" + encoded.replace(/'/g, ''') + "'";
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
function prettyWriteNode(node: TNode, depth: number): void {
|
|
409
|
+
if (!node) return;
|
|
410
|
+
if (depth >= CIRCULAR_CHECK_DEPTH) {
|
|
411
|
+
if (seen === null) seen = new WeakSet<TNode>();
|
|
412
|
+
if (seen.has(node)) {
|
|
413
|
+
throw new Error('Circular reference detected in TNode tree');
|
|
414
|
+
}
|
|
415
|
+
seen.add(node);
|
|
416
|
+
}
|
|
417
|
+
const tag = node.tagName;
|
|
418
|
+
validateTagName(tag);
|
|
419
|
+
const padding = indent.repeat(depth);
|
|
420
|
+
const firstChar = tag.charCodeAt(0);
|
|
421
|
+
// Skip attribute name validation for declarations (!DOCTYPE etc.)
|
|
422
|
+
// where keys are quoted identifiers, not XML attribute names
|
|
423
|
+
if (node.attributes !== null && firstChar !== BANG) {
|
|
424
|
+
validateAttributeNames(node.attributes);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// Processing instruction
|
|
428
|
+
if (firstChar === QUESTION) {
|
|
429
|
+
out += padding + '<' + tag;
|
|
430
|
+
prettyWriteAttributes(node);
|
|
431
|
+
out += '?>';
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Declaration tags (e.g. !DOCTYPE) — void in all modes
|
|
436
|
+
if (firstChar === BANG) {
|
|
437
|
+
out += padding + '<' + tag;
|
|
438
|
+
prettyWriteAttributes(node, true);
|
|
439
|
+
out += '>';
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const children = node.children;
|
|
444
|
+
const childrenLength = children.length;
|
|
445
|
+
|
|
446
|
+
// HTML void elements — self-close without closing tag
|
|
447
|
+
if (voidSet !== null && voidSet.has(tag)) {
|
|
448
|
+
out += padding + '<' + tag;
|
|
449
|
+
prettyWriteAttributes(node);
|
|
450
|
+
out += '>';
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Empty element — self-close
|
|
455
|
+
if (childrenLength === 0) {
|
|
456
|
+
out += padding + '<' + tag;
|
|
457
|
+
prettyWriteAttributes(node);
|
|
458
|
+
out += '/>';
|
|
459
|
+
return;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Classify children
|
|
463
|
+
const hasText = hasTextChildren(children);
|
|
464
|
+
let hasElements = false;
|
|
465
|
+
if (hasText) {
|
|
466
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
467
|
+
if (typeof children[i] !== 'string') {
|
|
468
|
+
hasElements = true;
|
|
469
|
+
break;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// Text-only (no element children) — trim each text child, join with
|
|
475
|
+
// a single space, and write inline on one line.
|
|
476
|
+
if (hasText && !hasElements) {
|
|
477
|
+
out += padding + '<' + tag;
|
|
478
|
+
prettyWriteAttributes(node);
|
|
479
|
+
out += '>';
|
|
480
|
+
let first = true;
|
|
481
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
482
|
+
const text = (children[i] as string).trim();
|
|
483
|
+
if (text.length === 0) continue;
|
|
484
|
+
if (!first) out += ' ';
|
|
485
|
+
first = false;
|
|
486
|
+
out += encodeTextContent(text);
|
|
487
|
+
}
|
|
488
|
+
out += '</' + tag + '>';
|
|
489
|
+
return;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Mixed content — trim text nodes, drop empty, place each non-empty
|
|
493
|
+
// text and each element child on its own indented line.
|
|
494
|
+
if (hasText) {
|
|
495
|
+
const childPadding = indent.repeat(depth + 1);
|
|
496
|
+
out += padding + '<' + tag;
|
|
497
|
+
prettyWriteAttributes(node);
|
|
498
|
+
out += '>';
|
|
499
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
500
|
+
const child = children[i]!;
|
|
501
|
+
if (typeof child === 'string') {
|
|
502
|
+
// Comments pass through without trimming
|
|
503
|
+
if (child.startsWith('<!--')) {
|
|
504
|
+
out += '\n' + childPadding + encodeTextContent(child);
|
|
505
|
+
} else {
|
|
506
|
+
const trimmed = child.trim();
|
|
507
|
+
if (trimmed.length > 0) {
|
|
508
|
+
out += '\n' + childPadding + encodeTextContent(trimmed);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
} else {
|
|
512
|
+
out += '\n';
|
|
513
|
+
prettyWriteNode(child, depth + 1);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
out += '\n' + padding + '</' + tag + '>';
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// Element-only children (and comments) — indent each child
|
|
521
|
+
out += padding + '<' + tag;
|
|
522
|
+
prettyWriteAttributes(node);
|
|
523
|
+
out += '>';
|
|
524
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
525
|
+
const child = children[i]!;
|
|
526
|
+
if (typeof child === 'string') {
|
|
527
|
+
// Comments get their own indented line
|
|
528
|
+
out += '\n' + indent.repeat(depth + 1) + encodeTextContent(child);
|
|
529
|
+
} else {
|
|
530
|
+
out += '\n';
|
|
531
|
+
prettyWriteNode(child, depth + 1);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
out += '\n' + padding + '</' + tag + '>';
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
const nodes = Array.isArray(input) ? input : [input];
|
|
538
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
539
|
+
const node = nodes[i]!;
|
|
540
|
+
if (i > 0) out += '\n';
|
|
541
|
+
if (typeof node === 'string') {
|
|
542
|
+
out += encodeTextContent(node);
|
|
543
|
+
} else {
|
|
544
|
+
prettyWriteNode(node, 0);
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
return out;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// ---------------------------------------------------------------------------
|
|
552
|
+
// Input format detection and conversion
|
|
553
|
+
// ---------------------------------------------------------------------------
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Detect the input format and convert to a `(TNode | string)[]` DOM array
|
|
557
|
+
* suitable for the writer functions.
|
|
558
|
+
*
|
|
559
|
+
* Detection heuristics:
|
|
560
|
+
* 1. A single TNode (has `tagName` string property) → pass through
|
|
561
|
+
* 2. An array whose first non-string element has `tagName` → already DOM
|
|
562
|
+
* 3. An array whose first non-string element has `$text`, `$comment`, `$attr`,
|
|
563
|
+
* or a single key mapping to an array → lossless format
|
|
564
|
+
* 4. Anything else (including `null`, bare strings, objects without `tagName`) → lossy
|
|
565
|
+
*/
|
|
566
|
+
function toDom(
|
|
567
|
+
input:
|
|
568
|
+
| TNode
|
|
569
|
+
| (TNode | string)[]
|
|
570
|
+
| LossyValue
|
|
571
|
+
| LossyValue[]
|
|
572
|
+
| LosslessEntry[],
|
|
573
|
+
): TNode | (TNode | string)[] {
|
|
574
|
+
if (input === null || input === undefined) return [];
|
|
575
|
+
|
|
576
|
+
// Single TNode — pass through
|
|
577
|
+
if (!Array.isArray(input) && typeof input === 'object' && isTNode(input)) {
|
|
578
|
+
return input as TNode;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Non-object, non-array: bare string (lossy top-level string)
|
|
582
|
+
if (!Array.isArray(input) && typeof input !== 'object') {
|
|
583
|
+
return fromLossy(input as LossyValue);
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Non-array object without tagName → lossy object
|
|
587
|
+
if (!Array.isArray(input)) {
|
|
588
|
+
return fromLossy(input as LossyValue);
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// Array — need to distinguish DOM, lossless, and lossy
|
|
592
|
+
const array = input as unknown[];
|
|
593
|
+
if (array.length === 0) return [];
|
|
594
|
+
|
|
595
|
+
// Find the first non-string element to inspect
|
|
596
|
+
let sample: unknown = undefined;
|
|
597
|
+
for (let i = 0; i < array.length; i++) {
|
|
598
|
+
if (typeof array[i] !== 'string') {
|
|
599
|
+
sample = array[i];
|
|
600
|
+
break;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// All strings → could be DOM (text-only top level) — pass through
|
|
605
|
+
if (sample === undefined) return array as string[];
|
|
606
|
+
|
|
607
|
+
// TNode in the array → DOM format
|
|
608
|
+
if (typeof sample === 'object' && sample !== null && isTNode(sample)) {
|
|
609
|
+
return array as (TNode | string)[];
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// Lossless entry: has $text, $comment, $attr, or single key → array value
|
|
613
|
+
if (
|
|
614
|
+
typeof sample === 'object' &&
|
|
615
|
+
sample !== null &&
|
|
616
|
+
isLosslessEntry(sample)
|
|
617
|
+
) {
|
|
618
|
+
return fromLossless(array as LosslessEntry[]);
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Everything else → lossy
|
|
622
|
+
return fromLossy(array as LossyValue[]);
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/** Check if a value looks like a TNode (has tagName string + children array). */
|
|
626
|
+
function isTNode(value: unknown): value is TNode {
|
|
627
|
+
return (
|
|
628
|
+
typeof value === 'object' &&
|
|
629
|
+
value !== null &&
|
|
630
|
+
typeof (value as TNode).tagName === 'string' &&
|
|
631
|
+
Array.isArray((value as TNode).children)
|
|
632
|
+
);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/** Check if a value looks like a LosslessEntry. */
|
|
636
|
+
function isLosslessEntry(value: unknown): value is LosslessEntry {
|
|
637
|
+
if (typeof value !== 'object' || value === null) return false;
|
|
638
|
+
const keys = Object.keys(value);
|
|
639
|
+
if (keys.length === 0) return false;
|
|
640
|
+
// Known lossless marker keys
|
|
641
|
+
if ('$text' in value || '$comment' in value || '$attr' in value) return true;
|
|
642
|
+
// Single key mapping to an array → element entry
|
|
643
|
+
if (
|
|
644
|
+
keys.length === 1 &&
|
|
645
|
+
Array.isArray((value as Record<string, unknown>)[keys[0]!])
|
|
646
|
+
) {
|
|
647
|
+
return true;
|
|
648
|
+
}
|
|
649
|
+
return false;
|
|
650
|
+
}
|