@eksml/xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +588 -0
- package/dist/converters/fromLossless.d.mts +14 -0
- package/dist/converters/fromLossless.d.mts.map +1 -0
- package/dist/converters/fromLossless.mjs +35 -0
- package/dist/converters/fromLossless.mjs.map +1 -0
- package/dist/converters/fromLossy.d.mts +18 -0
- package/dist/converters/fromLossy.d.mts.map +1 -0
- package/dist/converters/fromLossy.mjs +91 -0
- package/dist/converters/fromLossy.mjs.map +1 -0
- package/dist/converters/lossless.d.mts +39 -0
- package/dist/converters/lossless.d.mts.map +1 -0
- package/dist/converters/lossless.mjs +74 -0
- package/dist/converters/lossless.mjs.map +1 -0
- package/dist/converters/lossy.d.mts +42 -0
- package/dist/converters/lossy.d.mts.map +1 -0
- package/dist/converters/lossy.mjs +158 -0
- package/dist/converters/lossy.mjs.map +1 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
- package/dist/parser-BfdEfWDg.d.mts +95 -0
- package/dist/parser-BfdEfWDg.d.mts.map +1 -0
- package/dist/parser-CYq309aR.mjs +479 -0
- package/dist/parser-CYq309aR.mjs.map +1 -0
- package/dist/parser.d.mts +2 -0
- package/dist/parser.mjs +2 -0
- package/dist/sax.d.mts +64 -0
- package/dist/sax.d.mts.map +1 -0
- package/dist/sax.mjs +70 -0
- package/dist/sax.mjs.map +1 -0
- package/dist/saxEngine-BDnD7ruG.mjs +750 -0
- package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
- package/dist/utilities/index.d.mts +88 -0
- package/dist/utilities/index.d.mts.map +1 -0
- package/dist/utilities/index.mjs +87 -0
- package/dist/utilities/index.mjs.map +1 -0
- package/dist/writer.d.mts +58 -0
- package/dist/writer.d.mts.map +1 -0
- package/dist/writer.mjs +357 -0
- package/dist/writer.mjs.map +1 -0
- package/dist/xmlParseStream.d.mts +138 -0
- package/dist/xmlParseStream.d.mts.map +1 -0
- package/dist/xmlParseStream.mjs +313 -0
- package/dist/xmlParseStream.mjs.map +1 -0
- package/package.json +100 -0
- package/src/converters/fromLossless.ts +80 -0
- package/src/converters/fromLossy.ts +180 -0
- package/src/converters/lossless.ts +116 -0
- package/src/converters/lossy.ts +274 -0
- package/src/parser.ts +728 -0
- package/src/sax.ts +157 -0
- package/src/saxEngine.ts +1157 -0
- package/src/utilities/escapeRegExp.ts +19 -0
- package/src/utilities/filter.ts +63 -0
- package/src/utilities/getElementById.ts +21 -0
- package/src/utilities/getElementsByClassName.ts +22 -0
- package/src/utilities/htmlConstants.ts +26 -0
- package/src/utilities/index.ts +7 -0
- package/src/utilities/isElementNode.ts +19 -0
- package/src/utilities/isTextNode.ts +19 -0
- package/src/utilities/toContentString.ts +23 -0
- package/src/writer.ts +650 -0
- package/src/xmlParseStream.ts +597 -0
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
import { n as HTML_VOID_ELEMENTS, t as HTML_RAW_CONTENT_TAGS } from "./htmlConstants-D6fsKbZ-.mjs";
|
|
2
|
+
import { decodeHTML, decodeXML } from "entities";
|
|
3
|
+
//#region src/utilities/escapeRegExp.ts
|
|
4
|
+
/**
|
|
5
|
+
* Escapes special regex characters in a string so it can be safely
|
|
6
|
+
* interpolated into a `new RegExp(...)` pattern.
|
|
7
|
+
*
|
|
8
|
+
* Uses the native `RegExp.escape` when available (Node ≥ 24, Chrome ≥ 136,
|
|
9
|
+
* Firefox ≥ 134, Safari ≥ 18.2), otherwise falls back to a manual replacement.
|
|
10
|
+
* @internal
|
|
11
|
+
*/
|
|
12
|
+
const escapeRegExp = typeof RegExp.escape === "function" ? RegExp.escape : (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
13
|
+
//#endregion
|
|
14
|
+
//#region src/utilities/filter.ts
|
|
15
|
+
/**
|
|
16
|
+
* Filter nodes like Array.filter - returns nodes where the filter function returns true
|
|
17
|
+
* @param input - XML string or array of nodes to filter
|
|
18
|
+
* @param predicate - Filter function
|
|
19
|
+
* @param depth - Current depth in the tree (internal use)
|
|
20
|
+
* @param path - Current path in the tree (internal use)
|
|
21
|
+
* @returns Filtered array of nodes
|
|
22
|
+
*/
|
|
23
|
+
function filter(input, predicate, depth = 0, path = "") {
|
|
24
|
+
const out = [];
|
|
25
|
+
filterInto(out, typeof input === "string" ? parse(input) : input, predicate, depth, path);
|
|
26
|
+
return out;
|
|
27
|
+
}
|
|
28
|
+
function filterInto(out, children, predicate, depth, path) {
|
|
29
|
+
for (let i = 0; i < children.length; i++) {
|
|
30
|
+
const child = children[i];
|
|
31
|
+
if (typeof child === "object") {
|
|
32
|
+
if (predicate(child, i, depth, path)) out.push(child);
|
|
33
|
+
if (child.children) filterInto(out, child.children, predicate, depth + 1, (path ? path + "." : "") + i + "." + child.tagName);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
//#endregion
|
|
38
|
+
//#region src/parser.ts
|
|
39
|
+
const LT = 60;
|
|
40
|
+
const GT = 62;
|
|
41
|
+
const SLASH = 47;
|
|
42
|
+
const BANG = 33;
|
|
43
|
+
const QUESTION = 63;
|
|
44
|
+
const LBRACKET = 91;
|
|
45
|
+
const RBRACKET = 93;
|
|
46
|
+
const SQUOTE = 39;
|
|
47
|
+
const DQUOTE = 34;
|
|
48
|
+
const DASH = 45;
|
|
49
|
+
const UNDERSCORE = 95;
|
|
50
|
+
const COLON = 58;
|
|
51
|
+
const NAME_END = new Uint8Array(128);
|
|
52
|
+
NAME_END[9] = 1;
|
|
53
|
+
NAME_END[10] = 1;
|
|
54
|
+
NAME_END[13] = 1;
|
|
55
|
+
NAME_END[32] = 1;
|
|
56
|
+
NAME_END[47] = 1;
|
|
57
|
+
NAME_END[61] = 1;
|
|
58
|
+
NAME_END[62] = 1;
|
|
59
|
+
/**
|
|
60
|
+
* Parse XML/HTML into a DOM Object with minimal validation and fault tolerance
|
|
61
|
+
* @param S - The XML string to parse
|
|
62
|
+
* @param options - Parsing options
|
|
63
|
+
* @returns Array of parsed nodes and text content
|
|
64
|
+
*/
|
|
65
|
+
function parse(S, options) {
|
|
66
|
+
const resolvedOptions = options || {};
|
|
67
|
+
let pos = resolvedOptions.pos || 0;
|
|
68
|
+
const keepComments = !!resolvedOptions.keepComments;
|
|
69
|
+
const trimWhitespace = !!resolvedOptions.trimWhitespace;
|
|
70
|
+
const strict = !!resolvedOptions.strict;
|
|
71
|
+
const htmlMode = !!resolvedOptions.html;
|
|
72
|
+
const decode = resolvedOptions.entities === true ? htmlMode ? decodeHTML : decodeXML : null;
|
|
73
|
+
const selfClosingTagList = resolvedOptions.selfClosingTags ?? (htmlMode ? HTML_VOID_ELEMENTS : []);
|
|
74
|
+
const rawContentTagList = resolvedOptions.rawContentTags ?? (htmlMode ? HTML_RAW_CONTENT_TAGS : []);
|
|
75
|
+
const selfClosingSet = selfClosingTagList.length > 0 ? new Set(selfClosingTagList) : null;
|
|
76
|
+
const rawContentSet = rawContentTagList.length > 0 ? new Set(rawContentTagList) : null;
|
|
77
|
+
/** Build an error with line/column info for strict mode. */
|
|
78
|
+
function strictError(message, atPos) {
|
|
79
|
+
const p = atPos !== void 0 ? atPos : pos;
|
|
80
|
+
const lines = S.substring(0, p).split("\n");
|
|
81
|
+
const line = lines.length;
|
|
82
|
+
const column = lines[lines.length - 1].length + 1;
|
|
83
|
+
return /* @__PURE__ */ new Error(`${message} at line ${line}, column ${column}`);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Strip whitespace-only text nodes from a children array when it
|
|
87
|
+
* contains only element nodes and whitespace-only text (i.e. "ignorable
|
|
88
|
+
* whitespace" per XML spec). Mixed-content elements — those with at
|
|
89
|
+
* least one non-whitespace text child — are left untouched so that
|
|
90
|
+
* whitespace formatting is preserved.
|
|
91
|
+
* Mutates the array in-place for performance.
|
|
92
|
+
*/
|
|
93
|
+
function stripIgnorableWhitespace(children) {
|
|
94
|
+
let hasElement = false;
|
|
95
|
+
let hasWhitespaceOnlyText = false;
|
|
96
|
+
let hasNonWhitespaceText = false;
|
|
97
|
+
for (let i = 0; i < children.length; i++) {
|
|
98
|
+
const child = children[i];
|
|
99
|
+
if (typeof child !== "string") hasElement = true;
|
|
100
|
+
else if (child.trim().length === 0) hasWhitespaceOnlyText = true;
|
|
101
|
+
else hasNonWhitespaceText = true;
|
|
102
|
+
}
|
|
103
|
+
if (hasElement && hasWhitespaceOnlyText && !hasNonWhitespaceText) {
|
|
104
|
+
let writeIndex = 0;
|
|
105
|
+
for (let i = 0; i < children.length; i++) {
|
|
106
|
+
const child = children[i];
|
|
107
|
+
if (typeof child === "string" && child.trim().length === 0) continue;
|
|
108
|
+
children[writeIndex++] = child;
|
|
109
|
+
}
|
|
110
|
+
children.length = writeIndex;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
function parseChildren(rootTagName) {
|
|
114
|
+
const stack = [];
|
|
115
|
+
let currentTagName = rootTagName;
|
|
116
|
+
let children = [];
|
|
117
|
+
while (S[pos]) if (S.charCodeAt(pos) === LT) {
|
|
118
|
+
if (S.charCodeAt(pos + 1) === SLASH) {
|
|
119
|
+
const closeStart = pos + 2;
|
|
120
|
+
pos = S.indexOf(">", pos);
|
|
121
|
+
if (pos === -1) {
|
|
122
|
+
if (strict) throw strictError("Unclosed close tag", closeStart - 2);
|
|
123
|
+
pos = S.length;
|
|
124
|
+
stripIgnorableWhitespace(children);
|
|
125
|
+
while (stack.length > 0) {
|
|
126
|
+
const frame = stack.pop();
|
|
127
|
+
const node = {
|
|
128
|
+
tagName: currentTagName,
|
|
129
|
+
attributes: frame.attributes,
|
|
130
|
+
children
|
|
131
|
+
};
|
|
132
|
+
currentTagName = frame.tagName;
|
|
133
|
+
children = frame.children;
|
|
134
|
+
children.push(node);
|
|
135
|
+
stripIgnorableWhitespace(children);
|
|
136
|
+
}
|
|
137
|
+
return children;
|
|
138
|
+
}
|
|
139
|
+
const closeTag = S.substring(closeStart, pos).trimEnd();
|
|
140
|
+
if (closeTag !== currentTagName) throw strictError(`Unexpected close tag </${closeTag}> (expected </${currentTagName}>)`);
|
|
141
|
+
if (pos + 1) pos += 1;
|
|
142
|
+
stripIgnorableWhitespace(children);
|
|
143
|
+
if (stack.length === 0) return children;
|
|
144
|
+
const frame = stack.pop();
|
|
145
|
+
const node = {
|
|
146
|
+
tagName: currentTagName,
|
|
147
|
+
attributes: frame.attributes,
|
|
148
|
+
children
|
|
149
|
+
};
|
|
150
|
+
currentTagName = frame.tagName;
|
|
151
|
+
children = frame.children;
|
|
152
|
+
children.push(node);
|
|
153
|
+
if (node.tagName.charCodeAt(0) === QUESTION) {
|
|
154
|
+
children.push(...node.children);
|
|
155
|
+
node.children = [];
|
|
156
|
+
}
|
|
157
|
+
continue;
|
|
158
|
+
} else if (S.charCodeAt(pos + 1) === BANG) {
|
|
159
|
+
if (S.charCodeAt(pos + 2) === DASH) {
|
|
160
|
+
const startCommentPos = pos;
|
|
161
|
+
pos = S.indexOf("-->", pos + 3);
|
|
162
|
+
if (pos === -1) {
|
|
163
|
+
if (strict) throw strictError("Unclosed comment", startCommentPos);
|
|
164
|
+
pos = S.length;
|
|
165
|
+
if (keepComments) children.push(S.substring(startCommentPos));
|
|
166
|
+
} else {
|
|
167
|
+
pos += 2;
|
|
168
|
+
if (keepComments) children.push(S.substring(startCommentPos, pos + 1));
|
|
169
|
+
}
|
|
170
|
+
} else if (S.charCodeAt(pos + 2) === LBRACKET && S.charCodeAt(pos + 8) === LBRACKET && S.substring(pos + 3, pos + 8).toLowerCase() === "cdata") {
|
|
171
|
+
const cdataEndIndex = S.indexOf("]]>", pos);
|
|
172
|
+
if (cdataEndIndex === -1) {
|
|
173
|
+
if (strict) throw strictError("Unclosed CDATA section");
|
|
174
|
+
children.push(S.substring(pos + 9));
|
|
175
|
+
pos = S.length;
|
|
176
|
+
} else {
|
|
177
|
+
children.push(S.substring(pos + 9, cdataEndIndex));
|
|
178
|
+
pos = cdataEndIndex + 3;
|
|
179
|
+
}
|
|
180
|
+
continue;
|
|
181
|
+
} else {
|
|
182
|
+
pos += 2;
|
|
183
|
+
const keywordStart = pos - 1;
|
|
184
|
+
while (pos < S.length) {
|
|
185
|
+
const cc = S.charCodeAt(pos);
|
|
186
|
+
if (cc <= 32 || cc === GT || cc === LBRACKET) break;
|
|
187
|
+
pos++;
|
|
188
|
+
}
|
|
189
|
+
const declTagName = S.substring(keywordStart, pos);
|
|
190
|
+
let declAttributes = null;
|
|
191
|
+
while (pos < S.length) {
|
|
192
|
+
const cc = S.charCodeAt(pos);
|
|
193
|
+
if (cc === GT || cc === LBRACKET) break;
|
|
194
|
+
if (cc <= 32) {
|
|
195
|
+
pos++;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (cc === SQUOTE || cc === DQUOTE) {
|
|
199
|
+
const closePos = S.indexOf(cc === SQUOTE ? "'" : "\"", pos + 1);
|
|
200
|
+
if (closePos === -1) {
|
|
201
|
+
if (strict) throw strictError("Unclosed declaration");
|
|
202
|
+
pos = S.length;
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
const token = S.substring(pos + 1, closePos);
|
|
206
|
+
if (declAttributes === null) declAttributes = Object.create(null);
|
|
207
|
+
declAttributes[token] = null;
|
|
208
|
+
pos = closePos + 1;
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
const tokenStart = pos;
|
|
212
|
+
while (pos < S.length) {
|
|
213
|
+
const tc = S.charCodeAt(pos);
|
|
214
|
+
if (tc <= 32 || tc === GT || tc === LBRACKET) break;
|
|
215
|
+
pos++;
|
|
216
|
+
}
|
|
217
|
+
const token = S.substring(tokenStart, pos);
|
|
218
|
+
if (declAttributes === null) declAttributes = Object.create(null);
|
|
219
|
+
declAttributes[token] = null;
|
|
220
|
+
}
|
|
221
|
+
if (pos < S.length && S.charCodeAt(pos) === LBRACKET) {
|
|
222
|
+
pos++;
|
|
223
|
+
let insideBracketSection = true;
|
|
224
|
+
while (insideBracketSection && pos < S.length) {
|
|
225
|
+
if (S.charCodeAt(pos) === RBRACKET) insideBracketSection = false;
|
|
226
|
+
else {
|
|
227
|
+
const quoteCharCode = S.charCodeAt(pos);
|
|
228
|
+
if (quoteCharCode === SQUOTE || quoteCharCode === DQUOTE) {
|
|
229
|
+
pos = S.indexOf(quoteCharCode === SQUOTE ? "'" : "\"", pos + 1);
|
|
230
|
+
if (pos === -1) {
|
|
231
|
+
pos = S.length;
|
|
232
|
+
break;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
pos++;
|
|
237
|
+
}
|
|
238
|
+
while (pos < S.length && S.charCodeAt(pos) <= 32) pos++;
|
|
239
|
+
}
|
|
240
|
+
if (strict && (pos >= S.length || S.charCodeAt(pos) !== GT)) throw strictError("Unclosed declaration");
|
|
241
|
+
children.push({
|
|
242
|
+
tagName: declTagName,
|
|
243
|
+
attributes: declAttributes,
|
|
244
|
+
children: []
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
pos++;
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
pos++;
|
|
251
|
+
const tagName = parseName();
|
|
252
|
+
let attributes = null;
|
|
253
|
+
while (S.charCodeAt(pos) !== GT && S[pos]) {
|
|
254
|
+
let charCode = S.charCodeAt(pos);
|
|
255
|
+
if (charCode > 64 && charCode < 91 || charCode > 96 && charCode < 123 || charCode === UNDERSCORE || charCode === COLON || charCode > 127) {
|
|
256
|
+
const name = parseName();
|
|
257
|
+
let code = S.charCodeAt(pos);
|
|
258
|
+
while (code && code !== SQUOTE && code !== DQUOTE && !(code > 64 && code < 91 || code > 96 && code < 123 || code === UNDERSCORE || code === COLON || code > 127) && code !== GT) {
|
|
259
|
+
pos++;
|
|
260
|
+
code = S.charCodeAt(pos);
|
|
261
|
+
}
|
|
262
|
+
let value;
|
|
263
|
+
if (code === SQUOTE || code === DQUOTE) {
|
|
264
|
+
value = parseString();
|
|
265
|
+
if (pos === -1) {
|
|
266
|
+
const node = {
|
|
267
|
+
tagName,
|
|
268
|
+
attributes,
|
|
269
|
+
children: []
|
|
270
|
+
};
|
|
271
|
+
children.push(node);
|
|
272
|
+
while (stack.length > 0) {
|
|
273
|
+
const frame = stack.pop();
|
|
274
|
+
const parent = {
|
|
275
|
+
tagName: currentTagName,
|
|
276
|
+
attributes: frame.attributes,
|
|
277
|
+
children
|
|
278
|
+
};
|
|
279
|
+
currentTagName = frame.tagName;
|
|
280
|
+
children = frame.children;
|
|
281
|
+
children.push(parent);
|
|
282
|
+
}
|
|
283
|
+
return children;
|
|
284
|
+
}
|
|
285
|
+
if (decode) value = decode(value);
|
|
286
|
+
} else {
|
|
287
|
+
value = null;
|
|
288
|
+
pos--;
|
|
289
|
+
}
|
|
290
|
+
if (attributes === null) attributes = Object.create(null);
|
|
291
|
+
attributes[name] = value;
|
|
292
|
+
}
|
|
293
|
+
pos++;
|
|
294
|
+
}
|
|
295
|
+
if (strict && !S[pos]) throw strictError(`Unclosed tag <${tagName}>`);
|
|
296
|
+
if (S.charCodeAt(pos - 1) !== SLASH && S.charCodeAt(pos - 1) !== QUESTION && tagName.charCodeAt(0) !== BANG) if (rawContentSet !== null && rawContentSet.has(tagName)) {
|
|
297
|
+
const closeTagStr = "</" + tagName + ">";
|
|
298
|
+
const start = pos + 1;
|
|
299
|
+
pos = S.indexOf(closeTagStr, start);
|
|
300
|
+
let rawChildren;
|
|
301
|
+
if (pos === -1) {
|
|
302
|
+
if (strict) throw strictError(`Unclosed tag <${tagName}>`);
|
|
303
|
+
rawChildren = [S.substring(start)];
|
|
304
|
+
pos = S.length;
|
|
305
|
+
} else {
|
|
306
|
+
rawChildren = [S.substring(start, pos)];
|
|
307
|
+
pos += closeTagStr.length;
|
|
308
|
+
}
|
|
309
|
+
const node = {
|
|
310
|
+
tagName,
|
|
311
|
+
attributes,
|
|
312
|
+
children: rawChildren
|
|
313
|
+
};
|
|
314
|
+
children.push(node);
|
|
315
|
+
if (tagName.charCodeAt(0) === QUESTION) {
|
|
316
|
+
children.push(...node.children);
|
|
317
|
+
node.children = [];
|
|
318
|
+
}
|
|
319
|
+
} else if (selfClosingSet === null || !selfClosingSet.has(tagName)) {
|
|
320
|
+
pos++;
|
|
321
|
+
stack.push({
|
|
322
|
+
tagName: currentTagName,
|
|
323
|
+
attributes,
|
|
324
|
+
children
|
|
325
|
+
});
|
|
326
|
+
currentTagName = tagName;
|
|
327
|
+
children = [];
|
|
328
|
+
} else {
|
|
329
|
+
pos++;
|
|
330
|
+
const node = {
|
|
331
|
+
tagName,
|
|
332
|
+
attributes,
|
|
333
|
+
children: []
|
|
334
|
+
};
|
|
335
|
+
children.push(node);
|
|
336
|
+
if (tagName.charCodeAt(0) === QUESTION) {
|
|
337
|
+
children.push(...node.children);
|
|
338
|
+
node.children = [];
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
else {
|
|
342
|
+
pos++;
|
|
343
|
+
const node = {
|
|
344
|
+
tagName,
|
|
345
|
+
attributes,
|
|
346
|
+
children: []
|
|
347
|
+
};
|
|
348
|
+
children.push(node);
|
|
349
|
+
if (tagName.charCodeAt(0) === QUESTION) {
|
|
350
|
+
children.push(...node.children);
|
|
351
|
+
node.children = [];
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
} else {
|
|
355
|
+
let text = parseText();
|
|
356
|
+
if (decode) text = decode(text);
|
|
357
|
+
if (trimWhitespace) {
|
|
358
|
+
const trimmed = text.trim();
|
|
359
|
+
if (trimmed.length > 0) children.push(trimmed);
|
|
360
|
+
} else if (text.length > 0) children.push(text);
|
|
361
|
+
pos++;
|
|
362
|
+
}
|
|
363
|
+
if (strict && currentTagName !== "") throw strictError(`Unclosed tag <${currentTagName}>`);
|
|
364
|
+
stripIgnorableWhitespace(children);
|
|
365
|
+
while (stack.length > 0) {
|
|
366
|
+
const frame = stack.pop();
|
|
367
|
+
const node = {
|
|
368
|
+
tagName: currentTagName,
|
|
369
|
+
attributes: frame.attributes,
|
|
370
|
+
children
|
|
371
|
+
};
|
|
372
|
+
currentTagName = frame.tagName;
|
|
373
|
+
children = frame.children;
|
|
374
|
+
children.push(node);
|
|
375
|
+
stripIgnorableWhitespace(children);
|
|
376
|
+
}
|
|
377
|
+
return children;
|
|
378
|
+
}
|
|
379
|
+
function parseText() {
|
|
380
|
+
const start = pos;
|
|
381
|
+
pos = S.indexOf("<", pos) - 1;
|
|
382
|
+
if (pos === -2) pos = S.length;
|
|
383
|
+
return S.substring(start, pos + 1);
|
|
384
|
+
}
|
|
385
|
+
function parseName() {
|
|
386
|
+
const start = pos;
|
|
387
|
+
let charCode = S.charCodeAt(pos);
|
|
388
|
+
while (charCode < 128 ? NAME_END[charCode] === 0 : charCode === charCode) charCode = S.charCodeAt(++pos);
|
|
389
|
+
return S.substring(start, pos);
|
|
390
|
+
}
|
|
391
|
+
function parseNode() {
|
|
392
|
+
pos++;
|
|
393
|
+
const tagName = parseName();
|
|
394
|
+
let attributes = null;
|
|
395
|
+
let children = [];
|
|
396
|
+
while (S.charCodeAt(pos) !== GT && S[pos]) {
|
|
397
|
+
let charCode = S.charCodeAt(pos);
|
|
398
|
+
if (charCode > 64 && charCode < 91 || charCode > 96 && charCode < 123 || charCode === UNDERSCORE || charCode === COLON || charCode > 127) {
|
|
399
|
+
const name = parseName();
|
|
400
|
+
let code = S.charCodeAt(pos);
|
|
401
|
+
while (code && code !== SQUOTE && code !== DQUOTE && !(code > 64 && code < 91 || code > 96 && code < 123 || code === UNDERSCORE || code === COLON || code > 127) && code !== GT) {
|
|
402
|
+
pos++;
|
|
403
|
+
code = S.charCodeAt(pos);
|
|
404
|
+
}
|
|
405
|
+
let value;
|
|
406
|
+
if (code === SQUOTE || code === DQUOTE) {
|
|
407
|
+
value = parseString();
|
|
408
|
+
if (pos === -1) return {
|
|
409
|
+
tagName,
|
|
410
|
+
attributes,
|
|
411
|
+
children
|
|
412
|
+
};
|
|
413
|
+
if (decode) value = decode(value);
|
|
414
|
+
} else {
|
|
415
|
+
value = null;
|
|
416
|
+
pos--;
|
|
417
|
+
}
|
|
418
|
+
if (attributes === null) attributes = Object.create(null);
|
|
419
|
+
attributes[name] = value;
|
|
420
|
+
}
|
|
421
|
+
pos++;
|
|
422
|
+
}
|
|
423
|
+
if (strict && !S[pos]) throw strictError(`Unclosed tag <${tagName}>`);
|
|
424
|
+
if (S.charCodeAt(pos - 1) !== SLASH && S.charCodeAt(pos - 1) !== QUESTION && tagName.charCodeAt(0) !== BANG) if (rawContentSet !== null && rawContentSet.has(tagName)) {
|
|
425
|
+
const closeTag = "</" + tagName + ">";
|
|
426
|
+
const start = pos + 1;
|
|
427
|
+
pos = S.indexOf(closeTag, start);
|
|
428
|
+
if (pos === -1) {
|
|
429
|
+
if (strict) throw strictError(`Unclosed tag <${tagName}>`);
|
|
430
|
+
children = [S.substring(start)];
|
|
431
|
+
pos = S.length;
|
|
432
|
+
} else {
|
|
433
|
+
children = [S.substring(start, pos)];
|
|
434
|
+
pos += closeTag.length;
|
|
435
|
+
}
|
|
436
|
+
} else if (selfClosingSet === null || !selfClosingSet.has(tagName)) {
|
|
437
|
+
pos++;
|
|
438
|
+
children = parseChildren(tagName);
|
|
439
|
+
} else pos++;
|
|
440
|
+
else pos++;
|
|
441
|
+
return {
|
|
442
|
+
tagName,
|
|
443
|
+
attributes,
|
|
444
|
+
children
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
function parseString() {
|
|
448
|
+
const quoteCharCode = S.charCodeAt(pos);
|
|
449
|
+
const startPosition = pos + 1;
|
|
450
|
+
pos = S.indexOf(quoteCharCode === SQUOTE ? "'" : "\"", startPosition);
|
|
451
|
+
return S.substring(startPosition, pos);
|
|
452
|
+
}
|
|
453
|
+
function findElements() {
|
|
454
|
+
if (!resolvedOptions.attrName || !resolvedOptions.attrValue) return -1;
|
|
455
|
+
const matchResult = new RegExp("\\s" + escapeRegExp(resolvedOptions.attrName) + "\\s*=['\"]" + escapeRegExp(resolvedOptions.attrValue) + "['\"]").exec(S);
|
|
456
|
+
if (matchResult) return matchResult.index;
|
|
457
|
+
else return -1;
|
|
458
|
+
}
|
|
459
|
+
let out;
|
|
460
|
+
if (resolvedOptions.attrValue !== void 0) {
|
|
461
|
+
resolvedOptions.attrName = resolvedOptions.attrName || "id";
|
|
462
|
+
const results = [];
|
|
463
|
+
while ((pos = findElements()) !== -1) {
|
|
464
|
+
pos = S.lastIndexOf("<", pos);
|
|
465
|
+
if (pos !== -1) results.push(parseNode());
|
|
466
|
+
S = S.slice(pos);
|
|
467
|
+
pos = 0;
|
|
468
|
+
}
|
|
469
|
+
out = results;
|
|
470
|
+
} else if (resolvedOptions.parseNode) out = parseNode();
|
|
471
|
+
else out = parseChildren("");
|
|
472
|
+
if (resolvedOptions.filter && Array.isArray(out)) out = filter(out, resolvedOptions.filter);
|
|
473
|
+
if (resolvedOptions.setPos && typeof out === "object" && !Array.isArray(out)) out.pos = pos;
|
|
474
|
+
return out;
|
|
475
|
+
}
|
|
476
|
+
//#endregion
|
|
477
|
+
export { filter as n, escapeRegExp as r, parse as t };
|
|
478
|
+
|
|
479
|
+
//# sourceMappingURL=parser-CYq309aR.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser-CYq309aR.mjs","names":[],"sources":["../src/utilities/escapeRegExp.ts","../src/utilities/filter.ts","../src/parser.ts"],"sourcesContent":["// RegExp.escape is not yet in the ES2023 lib typings\ndeclare global {\n interface RegExpConstructor {\n escape?: (s: string) => string;\n }\n}\n\n/**\n * Escapes special regex characters in a string so it can be safely\n * interpolated into a `new RegExp(...)` pattern.\n *\n * Uses the native `RegExp.escape` when available (Node ≥ 24, Chrome ≥ 136,\n * Firefox ≥ 134, Safari ≥ 18.2), otherwise falls back to a manual replacement.\n * @internal\n */\nexport const escapeRegExp: (s: string) => string =\n typeof RegExp.escape === 'function'\n ? RegExp.escape\n : (s) => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n","import type { TNode } from '#src/parser.ts';\nimport { parse } from '#src/parser.ts';\n\n/**\n * Filter nodes like Array.filter - returns nodes where the filter function returns true\n * @param input - XML string or array of nodes to filter\n * @param predicate - Filter function\n * @param depth - Current depth in the tree (internal use)\n * @param path - Current path in the tree (internal use)\n * @returns Filtered array of nodes\n */\nexport function filter(\n input: string | (TNode | string)[],\n predicate: (\n node: TNode,\n index: number,\n depth: number,\n path: string,\n ) => boolean,\n depth: number = 0,\n path: string = '',\n): TNode[] {\n const out: TNode[] = [];\n filterInto(\n out,\n typeof input === 'string' ? parse(input) : input,\n predicate,\n depth,\n path,\n );\n return out;\n}\n\nfunction filterInto(\n out: TNode[],\n children: (TNode | string)[],\n predicate: (\n node: TNode,\n index: number,\n depth: number,\n path: string,\n ) => boolean,\n depth: number,\n path: string,\n): void {\n for (let i = 0; i < children.length; i++) {\n const child = children[i]!;\n if (typeof child === 'object') {\n if (predicate(child, i, depth, path)) {\n out.push(child);\n }\n if (child.children) {\n filterInto(\n out,\n child.children,\n predicate,\n depth + 1,\n (path ? path + '.' : '') + i + '.' + child.tagName,\n );\n }\n }\n }\n}\n","import { decodeXML, decodeHTML } from 'entities';\nimport { escapeRegExp } from '#src/utilities/escapeRegExp.ts';\nimport { filter } from '#src/utilities/filter.ts';\nimport {\n HTML_VOID_ELEMENTS,\n HTML_RAW_CONTENT_TAGS,\n} from '#src/utilities/htmlConstants.ts';\n// @generated:char-codes:begin\nconst LT = 60; // <\nconst GT = 62; // >\nconst SLASH = 47; // /\nconst BANG = 33; // !\nconst QUESTION = 63; // ?\nconst LBRACKET = 91; // [\nconst RBRACKET = 93; // ]\nconst SQUOTE = 39; // '\nconst DQUOTE = 34; // \"\nconst DASH = 45; // -\nconst UNDERSCORE = 95; // _\nconst COLON = 58; // :\n// @generated:char-codes:end\n\n/**\n * A parsed XML node\n */\nexport interface TNode {\n tagName: string;\n /**\n * Element attributes, or `null` if the element has no attributes.\n * Values can be:\n * - string: attribute with a value (e.g., `<div id=\"test\">` -> `{id: \"test\"}`)\n * - null: attribute without a value (e.g., `<input disabled>` -> `{disabled: null}`)\n * - empty string: attribute with empty value (e.g., `<input value=\"\">` -> `{value: \"\"}`)\n */\n attributes: Record<string, string | null> | null;\n children: (TNode | string)[];\n}\n\n/**\n * TNode with a pos property, returned when setPos option is true\n */\ninterface TNodeWithPos extends TNode {\n pos: number;\n}\n\n/**\n * Options for parsing XML\n */\nexport interface ParseOptions {\n /** Starting position in the string */\n pos?: number;\n /**\n * Array of tag names that are self-closing (void elements) and don't need closing tags.\n * In XML mode (default), this defaults to `[]` — self-closing is detected only by `/>` syntax.\n * In HTML mode (`html: true`), this defaults to the standard HTML void elements.\n * Can be overridden explicitly regardless of mode.\n */\n selfClosingTags?: string[];\n /**\n * Array of tag names whose content should be treated as raw text, not parsed as XML/HTML.\n * The parser will scan for the matching `</tagName>` close tag and emit everything between\n * as a single text child node.\n *\n * In XML mode (default), this defaults to `[]`.\n * In HTML mode (`html: true`), this defaults to `[\"script\", \"style\"]`.\n * Can be overridden explicitly regardless of mode.\n */\n rawContentTags?: string[];\n /**\n * Enable HTML parsing mode. When `true`, sets sensible defaults for:\n * - `selfClosingTags`: standard HTML void elements (area, base, br, col, embed, hr, img, input, link, meta, param, source, track, wbr)\n * - `rawContentTags`: elements whose content is raw text (script, style)\n *\n * These defaults can be overridden by explicitly passing `selfClosingTags` or `rawContentTags`.\n */\n html?: boolean;\n /** Keep XML comments in the output */\n keepComments?: boolean;\n /** Trim whitespace from text nodes and discard whitespace-only text nodes */\n trimWhitespace?: boolean;\n /**\n * Strict mode: throw on malformed XML instead of recovering silently.\n * Catches unclosed comments, CDATA sections, processing instructions,\n * close tags, and open tags that reach end-of-input without closing.\n */\n strict?: boolean;\n /**\n * Decode XML/HTML entities in text content and attribute values.\n * When enabled, named entities (`&`, `<`, etc.), decimal character\n * references (`ä`), and hex character references (`ä`) are decoded.\n *\n * In HTML mode (`html: true`), the full set of HTML named entities is\n * supported (e.g. ` `, `©`, `—`). In XML mode, only the\n * five standard XML entities plus numeric references are decoded.\n *\n * CDATA sections are never decoded regardless of this setting.\n *\n * Defaults to `false` — entities are preserved as-is in the output.\n */\n entities?: boolean;\n /** Attribute name to search for (used with attrValue) */\n attrName?: string;\n /** Attribute value to search for (regex pattern) */\n attrValue?: string;\n /** Filter function to apply to nodes */\n filter?: (node: TNode, index: number, depth: number, path: string) => boolean;\n}\n\n/** Internal options extending ParseOptions — not part of the public API. */\ninterface InternalParseOptions extends ParseOptions {\n /** If true, the returned object will have a pos property indicating where parsing stopped */\n setPos?: boolean;\n /** Parse a single node instead of a list of nodes */\n parseNode?: boolean;\n}\n\n// Pre-computed lookup table: 1 for characters that terminate a name token\n// Name-ending chars: \\t(9) \\n(10) \\r(13) space(32) /(47) =(61) >(62)\nconst NAME_END = new Uint8Array(128);\nNAME_END[9] = 1; // \\t\nNAME_END[10] = 1; // \\n\nNAME_END[13] = 1; // \\r\nNAME_END[32] = 1; // space\nNAME_END[47] = 1; // /\nNAME_END[61] = 1; // =\nNAME_END[62] = 1; // >\n\n/**\n * Parse XML/HTML into a DOM Object with minimal validation and fault tolerance\n * @param S - The XML string to parse\n * @param options - Parsing options\n * @returns Array of parsed nodes and text content\n */\nexport function parse(\n S: string,\n options?: ParseOptions | InternalParseOptions,\n): (TNode | string)[] {\n const resolvedOptions = (options || {}) as InternalParseOptions;\n\n let pos = resolvedOptions.pos || 0;\n const keepComments = !!resolvedOptions.keepComments;\n const trimWhitespace = !!resolvedOptions.trimWhitespace;\n const strict = !!resolvedOptions.strict;\n const htmlMode = !!resolvedOptions.html;\n const decode =\n resolvedOptions.entities === true\n ? htmlMode\n ? decodeHTML\n : decodeXML\n : null;\n\n const selfClosingTagList: string[] =\n resolvedOptions.selfClosingTags ?? (htmlMode ? HTML_VOID_ELEMENTS : []);\n const rawContentTagList: string[] =\n resolvedOptions.rawContentTags ?? (htmlMode ? HTML_RAW_CONTENT_TAGS : []);\n\n // Convert to Sets for O(1) lookup when non-empty\n const selfClosingSet: Set<string> | null =\n selfClosingTagList.length > 0 ? new Set(selfClosingTagList) : null;\n const rawContentSet: Set<string> | null =\n rawContentTagList.length > 0 ? new Set(rawContentTagList) : null;\n\n /** Build an error with line/column info for strict mode. */\n function strictError(message: string, atPos?: number): Error {\n const p = atPos !== undefined ? atPos : pos;\n const before = S.substring(0, p);\n const lines = before.split('\\n');\n const line = lines.length;\n const column = lines[lines.length - 1]!.length + 1;\n return new Error(`${message} at line ${line}, column ${column}`);\n }\n\n /**\n * Strip whitespace-only text nodes from a children array when it\n * contains only element nodes and whitespace-only text (i.e. \"ignorable\n * whitespace\" per XML spec). Mixed-content elements — those with at\n * least one non-whitespace text child — are left untouched so that\n * whitespace formatting is preserved.\n * Mutates the array in-place for performance.\n */\n function stripIgnorableWhitespace(children: (TNode | string)[]): void {\n let hasElement = false;\n let hasWhitespaceOnlyText = false;\n let hasNonWhitespaceText = false;\n for (let i = 0; i < children.length; i++) {\n const child = children[i]!;\n if (typeof child !== 'string') {\n hasElement = true;\n } else if (child.trim().length === 0) {\n hasWhitespaceOnlyText = true;\n } else {\n hasNonWhitespaceText = true;\n }\n }\n // Only strip when children are exclusively elements + whitespace-only\n // text (pure element containers). Mixed content is left intact.\n if (hasElement && hasWhitespaceOnlyText && !hasNonWhitespaceText) {\n // Compact in-place with a write pointer (avoids O(n) splice per removal)\n let writeIndex = 0;\n for (let i = 0; i < children.length; i++) {\n const child = children[i]!;\n if (typeof child === 'string' && child.trim().length === 0) continue;\n children[writeIndex++] = child;\n }\n children.length = writeIndex;\n }\n }\n\n function parseChildren(rootTagName: string): (TNode | string)[] {\n // Iterative tree-building using an explicit stack to avoid\n // stack overflow on deeply nested XML (the old recursive\n // parseNode → parseChildren → parseNode chain blew up at ~2000 levels).\n interface Frame {\n tagName: string;\n attributes: Record<string, string | null> | null;\n children: (TNode | string)[];\n }\n const stack: Frame[] = [];\n let currentTagName = rootTagName;\n let children: (TNode | string)[] = [];\n\n while (S[pos]) {\n if (S.charCodeAt(pos) === LT) {\n if (S.charCodeAt(pos + 1) === SLASH) {\n // ---- Close tag ----\n const closeStart = pos + 2;\n pos = S.indexOf('>', pos);\n\n if (pos === -1) {\n if (strict) throw strictError('Unclosed close tag', closeStart - 2);\n pos = S.length;\n stripIgnorableWhitespace(children);\n // Unwind: if we are inside a stacked frame, pop back\n while (stack.length > 0) {\n const frame = stack.pop()!;\n const node: TNode = {\n tagName: currentTagName,\n attributes: frame.attributes,\n children,\n };\n // Restore from stack\n currentTagName = frame.tagName;\n children = frame.children;\n children.push(node);\n stripIgnorableWhitespace(children);\n }\n return children;\n }\n\n const closeTag = S.substring(closeStart, pos).trimEnd();\n if (closeTag !== currentTagName) {\n throw strictError(\n `Unexpected close tag </${closeTag}> (expected </${currentTagName}>)`,\n );\n }\n\n if (pos + 1) pos += 1;\n\n stripIgnorableWhitespace(children);\n\n if (stack.length === 0) {\n // We've closed the root tag — return\n return children;\n }\n\n // Pop frame: finalize node and add to parent's children\n const frame = stack.pop()!;\n const node: TNode = {\n tagName: currentTagName,\n attributes: frame.attributes,\n children,\n };\n currentTagName = frame.tagName;\n children = frame.children;\n children.push(node);\n // Handle processing instruction children promotion\n if (node.tagName.charCodeAt(0) === QUESTION) {\n children.push(...node.children);\n node.children = [];\n }\n continue;\n } else if (S.charCodeAt(pos + 1) === BANG) {\n if (S.charCodeAt(pos + 2) === DASH) {\n // comment: use indexOf(\"-->\") for fast scanning\n const startCommentPos = pos;\n pos = S.indexOf('-->', pos + 3);\n if (pos === -1) {\n if (strict)\n throw strictError('Unclosed comment', startCommentPos);\n pos = S.length;\n if (keepComments) {\n children.push(S.substring(startCommentPos));\n }\n } else {\n pos += 2; // point to the '>'\n if (keepComments) {\n children.push(S.substring(startCommentPos, pos + 1));\n }\n }\n } else if (\n S.charCodeAt(pos + 2) === LBRACKET &&\n S.charCodeAt(pos + 8) === LBRACKET &&\n S.substring(pos + 3, pos + 8).toLowerCase() === 'cdata'\n ) {\n // cdata\n const cdataEndIndex = S.indexOf(']]>', pos);\n if (cdataEndIndex === -1) {\n if (strict) throw strictError('Unclosed CDATA section');\n children.push(S.substring(pos + 9));\n pos = S.length;\n } else {\n children.push(S.substring(pos + 9, cdataEndIndex));\n pos = cdataEndIndex + 3;\n }\n continue;\n } else {\n // doctype / other <!...> declarations: parse as TNode\n // Read the declaration keyword (e.g. \"!DOCTYPE\")\n pos += 2; // skip '<!'\n const keywordStart = pos - 1; // include the '!'\n while (pos < S.length) {\n const cc = S.charCodeAt(pos);\n if (cc <= 32 || cc === GT || cc === LBRACKET) break;\n pos++;\n }\n const declTagName = S.substring(keywordStart, pos);\n\n // Parse space-separated tokens as null-valued attributes\n let declAttributes: Record<string, string | null> | null = null;\n while (pos < S.length) {\n const cc = S.charCodeAt(pos);\n if (cc === GT || cc === LBRACKET) break;\n // Skip whitespace\n if (cc <= 32) {\n pos++;\n continue;\n }\n // Quoted token — capture including quotes as the key\n if (cc === SQUOTE || cc === DQUOTE) {\n const closePos = S.indexOf(cc === SQUOTE ? \"'\" : '\"', pos + 1);\n if (closePos === -1) {\n if (strict) throw strictError('Unclosed declaration');\n pos = S.length;\n break;\n }\n const token = S.substring(pos + 1, closePos);\n if (declAttributes === null)\n declAttributes = Object.create(null);\n declAttributes![token] = null;\n pos = closePos + 1;\n continue;\n }\n // Unquoted token\n const tokenStart = pos;\n while (pos < S.length) {\n const tc = S.charCodeAt(pos);\n if (tc <= 32 || tc === GT || tc === LBRACKET) break;\n pos++;\n }\n const token = S.substring(tokenStart, pos);\n if (declAttributes === null) declAttributes = Object.create(null);\n declAttributes![token] = null;\n }\n\n // Skip internal DTD subset ([...]) if present\n if (pos < S.length && S.charCodeAt(pos) === LBRACKET) {\n pos++; // skip '['\n let insideBracketSection = true;\n while (insideBracketSection && pos < S.length) {\n if (S.charCodeAt(pos) === RBRACKET) {\n insideBracketSection = false;\n } else {\n // Skip quoted strings inside internal DTD subset\n const quoteCharCode = S.charCodeAt(pos);\n if (quoteCharCode === SQUOTE || quoteCharCode === DQUOTE) {\n pos = S.indexOf(\n quoteCharCode === SQUOTE ? \"'\" : '\"',\n pos + 1,\n );\n if (pos === -1) {\n pos = S.length;\n break;\n }\n }\n }\n pos++;\n }\n // Skip any whitespace between ] and >\n while (pos < S.length && S.charCodeAt(pos) <= 32) pos++;\n }\n\n if (strict && (pos >= S.length || S.charCodeAt(pos) !== GT))\n throw strictError('Unclosed declaration');\n\n children.push({\n tagName: declTagName,\n attributes: declAttributes,\n children: [],\n } as TNode);\n }\n pos++;\n continue;\n }\n // ---- Open tag (inline parseNode logic) ----\n pos++;\n const tagName = parseName();\n let attributes: Record<string, string | null> | null = null;\n\n // parsing attributes\n while (S.charCodeAt(pos) !== GT && S[pos]) {\n let charCode = S.charCodeAt(pos);\n if (\n (charCode > 64 && charCode < 91) ||\n (charCode > 96 && charCode < 123) ||\n charCode === UNDERSCORE ||\n charCode === COLON ||\n charCode > 127\n ) {\n const name = parseName();\n let code = S.charCodeAt(pos);\n while (\n code &&\n code !== SQUOTE &&\n code !== DQUOTE &&\n !(\n (code > 64 && code < 91) ||\n (code > 96 && code < 123) ||\n code === UNDERSCORE ||\n code === COLON ||\n code > 127\n ) &&\n code !== GT\n ) {\n pos++;\n code = S.charCodeAt(pos);\n }\n let value: string | null;\n if (code === SQUOTE || code === DQUOTE) {\n value = parseString();\n if (pos === -1) {\n // Unterminated attribute string — emit node with what we have\n const node: TNode = { tagName, attributes, children: [] };\n children.push(node);\n // Unwind remaining stack frames\n while (stack.length > 0) {\n const frame = stack.pop()!;\n const parent: TNode = {\n tagName: currentTagName,\n attributes: frame.attributes,\n children,\n };\n currentTagName = frame.tagName;\n children = frame.children;\n children.push(parent);\n }\n return children;\n }\n if (decode) value = decode(value);\n } else {\n value = null;\n pos--;\n }\n if (attributes === null) attributes = Object.create(null);\n attributes![name] = value;\n }\n pos++;\n }\n if (strict && !S[pos]) {\n throw strictError(`Unclosed tag <${tagName}>`);\n }\n\n // Determine if this node has children or is self-closing\n if (\n S.charCodeAt(pos - 1) !== SLASH &&\n S.charCodeAt(pos - 1) !== QUESTION &&\n tagName.charCodeAt(0) !== BANG\n ) {\n if (rawContentSet !== null && rawContentSet.has(tagName)) {\n // Raw content tag\n const closeTagStr = '</' + tagName + '>';\n const start = pos + 1;\n pos = S.indexOf(closeTagStr, start);\n let rawChildren: (TNode | string)[];\n if (pos === -1) {\n if (strict) throw strictError(`Unclosed tag <${tagName}>`);\n rawChildren = [S.substring(start)];\n pos = S.length;\n } else {\n rawChildren = [S.substring(start, pos)];\n pos += closeTagStr.length;\n }\n const node: TNode = { tagName, attributes, children: rawChildren };\n children.push(node);\n if (tagName.charCodeAt(0) === QUESTION) {\n children.push(...node.children);\n node.children = [];\n }\n } else if (selfClosingSet === null || !selfClosingSet.has(tagName)) {\n // Node has children — push frame and descend\n pos++;\n stack.push({ tagName: currentTagName, attributes, children });\n currentTagName = tagName;\n children = [];\n } else {\n // Self-closing tag (from selfClosingTags list)\n pos++;\n const node: TNode = { tagName, attributes, children: [] };\n children.push(node);\n if (tagName.charCodeAt(0) === QUESTION) {\n children.push(...node.children);\n node.children = [];\n }\n }\n } else {\n // Explicit self-closing (/>) or processing instruction (?>) or declaration\n pos++;\n const node: TNode = { tagName, attributes, children: [] };\n children.push(node);\n if (tagName.charCodeAt(0) === QUESTION) {\n children.push(...node.children);\n node.children = [];\n }\n }\n } else {\n let text = parseText();\n if (decode) text = decode(text);\n if (trimWhitespace) {\n const trimmed = text.trim();\n if (trimmed.length > 0) {\n children.push(trimmed);\n }\n } else {\n if (text.length > 0) {\n children.push(text);\n }\n }\n pos++;\n }\n }\n // If we exit the loop for a named tag, input ended without a close tag\n if (strict && currentTagName !== '') {\n throw strictError(`Unclosed tag <${currentTagName}>`);\n }\n stripIgnorableWhitespace(children);\n // Unwind any remaining stack frames (unclosed tags in non-strict mode)\n while (stack.length > 0) {\n const frame = stack.pop()!;\n const node: TNode = {\n tagName: currentTagName,\n attributes: frame.attributes,\n children,\n };\n currentTagName = frame.tagName;\n children = frame.children;\n children.push(node);\n stripIgnorableWhitespace(children);\n }\n return children;\n }\n\n function parseText(): string {\n const start = pos;\n pos = S.indexOf('<', pos) - 1;\n if (pos === -2) pos = S.length;\n return S.substring(start, pos + 1);\n }\n\n function parseName(): string {\n const start = pos;\n let charCode = S.charCodeAt(pos);\n while (\n charCode < 128\n ? NAME_END[charCode] === 0\n : charCode === charCode /* not NaN = not past end */\n ) {\n charCode = S.charCodeAt(++pos);\n }\n return S.substring(start, pos);\n }\n\n function parseNode(): TNode {\n pos++;\n const tagName = parseName();\n // Defer attributes allocation until first attribute is found\n let attributes: Record<string, string | null> | null = null;\n let children: (TNode | string)[] = [];\n\n // parsing attributes\n while (S.charCodeAt(pos) !== GT && S[pos]) {\n let charCode = S.charCodeAt(pos);\n // Valid XML attribute name start: A-Z, a-z, _, :, or non-ASCII\n if (\n (charCode > 64 && charCode < 91) ||\n (charCode > 96 && charCode < 123) ||\n charCode === UNDERSCORE ||\n charCode === COLON ||\n charCode > 127\n ) {\n const name = parseName();\n // search beginning of the string\n let code = S.charCodeAt(pos);\n while (\n code &&\n code !== SQUOTE &&\n code !== DQUOTE &&\n !(\n (code > 64 && code < 91) ||\n (code > 96 && code < 123) ||\n code === UNDERSCORE ||\n code === COLON ||\n code > 127\n ) &&\n code !== GT\n ) {\n pos++;\n code = S.charCodeAt(pos);\n }\n let value: string | null;\n if (code === SQUOTE || code === DQUOTE) {\n value = parseString();\n if (pos === -1) {\n return { tagName, attributes, children };\n }\n if (decode) value = decode(value);\n } else {\n value = null;\n pos--;\n }\n // Allocate attributes object lazily on first attribute\n if (attributes === null) attributes = Object.create(null);\n attributes![name] = value;\n }\n pos++;\n }\n if (strict && !S[pos]) {\n throw strictError(`Unclosed tag <${tagName}>`);\n }\n // optional parsing of children\n // Self-closing: explicit />, processing instruction ?>, or declaration <!...>\n if (\n S.charCodeAt(pos - 1) !== SLASH &&\n S.charCodeAt(pos - 1) !== QUESTION &&\n tagName.charCodeAt(0) !== BANG\n ) {\n if (rawContentSet !== null && rawContentSet.has(tagName)) {\n // Raw content tag: scan for the matching close tag and emit content as raw text\n const closeTag = '</' + tagName + '>';\n const start = pos + 1;\n pos = S.indexOf(closeTag, start);\n if (pos === -1) {\n if (strict) throw strictError(`Unclosed tag <${tagName}>`);\n // Unclosed raw content tag: consume the rest of the string\n children = [S.substring(start)];\n pos = S.length;\n } else {\n children = [S.substring(start, pos)];\n pos += closeTag.length;\n }\n } else if (selfClosingSet === null || !selfClosingSet.has(tagName)) {\n pos++;\n children = parseChildren(tagName);\n } else {\n pos++;\n }\n } else {\n pos++;\n }\n return { tagName, attributes, children };\n }\n\n function parseString(): string {\n const quoteCharCode = S.charCodeAt(pos);\n const startPosition = pos + 1;\n pos = S.indexOf(quoteCharCode === SQUOTE ? \"'\" : '\"', startPosition);\n return S.substring(startPosition, pos);\n }\n\n function findElements(): number {\n if (!resolvedOptions.attrName || !resolvedOptions.attrValue) return -1;\n const matchResult = new RegExp(\n '\\\\s' +\n escapeRegExp(resolvedOptions.attrName) +\n '\\\\s*=[\\'\"]' +\n escapeRegExp(resolvedOptions.attrValue) +\n '[\\'\"]',\n ).exec(S);\n if (matchResult) {\n return matchResult.index;\n } else {\n return -1;\n }\n }\n\n let out: (TNode | string)[] | TNode;\n\n if (resolvedOptions.attrValue !== undefined) {\n resolvedOptions.attrName = resolvedOptions.attrName || 'id';\n const results: (TNode | string)[] = [];\n\n while ((pos = findElements()) !== -1) {\n pos = S.lastIndexOf('<', pos);\n if (pos !== -1) {\n results.push(parseNode());\n }\n S = S.slice(pos);\n pos = 0;\n }\n out = results;\n } else if (resolvedOptions.parseNode) {\n out = parseNode();\n } else {\n out = parseChildren('');\n }\n\n if (resolvedOptions.filter && Array.isArray(out)) {\n out = filter(out, resolvedOptions.filter);\n }\n\n if (\n resolvedOptions.setPos &&\n typeof out === 'object' &&\n !Array.isArray(out)\n ) {\n (out as TNodeWithPos).pos = pos;\n }\n\n return out as (TNode | string)[];\n}\n"],"mappings":";;;;;;;;;;;AAeA,MAAa,eACX,OAAO,OAAO,WAAW,aACrB,OAAO,UACN,MAAM,EAAE,QAAQ,uBAAuB,OAAO;;;;;;;;;;;ACPrD,SAAgB,OACd,OACA,WAMA,QAAgB,GAChB,OAAe,IACN;CACT,MAAM,MAAe,EAAE;AACvB,YACE,KACA,OAAO,UAAU,WAAW,MAAM,MAAM,GAAG,OAC3C,WACA,OACA,KACD;AACD,QAAO;;AAGT,SAAS,WACP,KACA,UACA,WAMA,OACA,MACM;AACN,MAAK,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;EACxC,MAAM,QAAQ,SAAS;AACvB,MAAI,OAAO,UAAU,UAAU;AAC7B,OAAI,UAAU,OAAO,GAAG,OAAO,KAAK,CAClC,KAAI,KAAK,MAAM;AAEjB,OAAI,MAAM,SACR,YACE,KACA,MAAM,UACN,WACA,QAAQ,IACP,OAAO,OAAO,MAAM,MAAM,IAAI,MAAM,MAAM,QAC5C;;;;;;AClDT,MAAM,KAAK;AACX,MAAM,KAAK;AACX,MAAM,QAAQ;AACd,MAAM,OAAO;AACb,MAAM,WAAW;AACjB,MAAM,WAAW;AACjB,MAAM,WAAW;AACjB,MAAM,SAAS;AACf,MAAM,SAAS;AACf,MAAM,OAAO;AACb,MAAM,aAAa;AACnB,MAAM,QAAQ;AAmGd,MAAM,WAAW,IAAI,WAAW,IAAI;AACpC,SAAS,KAAK;AACd,SAAS,MAAM;AACf,SAAS,MAAM;AACf,SAAS,MAAM;AACf,SAAS,MAAM;AACf,SAAS,MAAM;AACf,SAAS,MAAM;;;;;;;AAQf,SAAgB,MACd,GACA,SACoB;CACpB,MAAM,kBAAmB,WAAW,EAAE;CAEtC,IAAI,MAAM,gBAAgB,OAAO;CACjC,MAAM,eAAe,CAAC,CAAC,gBAAgB;CACvC,MAAM,iBAAiB,CAAC,CAAC,gBAAgB;CACzC,MAAM,SAAS,CAAC,CAAC,gBAAgB;CACjC,MAAM,WAAW,CAAC,CAAC,gBAAgB;CACnC,MAAM,SACJ,gBAAgB,aAAa,OACzB,WACE,aACA,YACF;CAEN,MAAM,qBACJ,gBAAgB,oBAAoB,WAAW,qBAAqB,EAAE;CACxE,MAAM,oBACJ,gBAAgB,mBAAmB,WAAW,wBAAwB,EAAE;CAG1E,MAAM,iBACJ,mBAAmB,SAAS,IAAI,IAAI,IAAI,mBAAmB,GAAG;CAChE,MAAM,gBACJ,kBAAkB,SAAS,IAAI,IAAI,IAAI,kBAAkB,GAAG;;CAG9D,SAAS,YAAY,SAAiB,OAAuB;EAC3D,MAAM,IAAI,UAAU,KAAA,IAAY,QAAQ;EAExC,MAAM,QADS,EAAE,UAAU,GAAG,EAAE,CACX,MAAM,KAAK;EAChC,MAAM,OAAO,MAAM;EACnB,MAAM,SAAS,MAAM,MAAM,SAAS,GAAI,SAAS;AACjD,yBAAO,IAAI,MAAM,GAAG,QAAQ,WAAW,KAAK,WAAW,SAAS;;;;;;;;;;CAWlE,SAAS,yBAAyB,UAAoC;EACpE,IAAI,aAAa;EACjB,IAAI,wBAAwB;EAC5B,IAAI,uBAAuB;AAC3B,OAAK,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;GACxC,MAAM,QAAQ,SAAS;AACvB,OAAI,OAAO,UAAU,SACnB,cAAa;YACJ,MAAM,MAAM,CAAC,WAAW,EACjC,yBAAwB;OAExB,wBAAuB;;AAK3B,MAAI,cAAc,yBAAyB,CAAC,sBAAsB;GAEhE,IAAI,aAAa;AACjB,QAAK,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;IACxC,MAAM,QAAQ,SAAS;AACvB,QAAI,OAAO,UAAU,YAAY,MAAM,MAAM,CAAC,WAAW,EAAG;AAC5D,aAAS,gBAAgB;;AAE3B,YAAS,SAAS;;;CAItB,SAAS,cAAc,aAAyC;EAS9D,MAAM,QAAiB,EAAE;EACzB,IAAI,iBAAiB;EACrB,IAAI,WAA+B,EAAE;AAErC,SAAO,EAAE,KACP,KAAI,EAAE,WAAW,IAAI,KAAK,IAAI;AAC5B,OAAI,EAAE,WAAW,MAAM,EAAE,KAAK,OAAO;IAEnC,MAAM,aAAa,MAAM;AACzB,UAAM,EAAE,QAAQ,KAAK,IAAI;AAEzB,QAAI,QAAQ,IAAI;AACd,SAAI,OAAQ,OAAM,YAAY,sBAAsB,aAAa,EAAE;AACnE,WAAM,EAAE;AACR,8BAAyB,SAAS;AAElC,YAAO,MAAM,SAAS,GAAG;MACvB,MAAM,QAAQ,MAAM,KAAK;MACzB,MAAM,OAAc;OAClB,SAAS;OACT,YAAY,MAAM;OAClB;OACD;AAED,uBAAiB,MAAM;AACvB,iBAAW,MAAM;AACjB,eAAS,KAAK,KAAK;AACnB,+BAAyB,SAAS;;AAEpC,YAAO;;IAGT,MAAM,WAAW,EAAE,UAAU,YAAY,IAAI,CAAC,SAAS;AACvD,QAAI,aAAa,eACf,OAAM,YACJ,0BAA0B,SAAS,gBAAgB,eAAe,IACnE;AAGH,QAAI,MAAM,EAAG,QAAO;AAEpB,6BAAyB,SAAS;AAElC,QAAI,MAAM,WAAW,EAEnB,QAAO;IAIT,MAAM,QAAQ,MAAM,KAAK;IACzB,MAAM,OAAc;KAClB,SAAS;KACT,YAAY,MAAM;KAClB;KACD;AACD,qBAAiB,MAAM;AACvB,eAAW,MAAM;AACjB,aAAS,KAAK,KAAK;AAEnB,QAAI,KAAK,QAAQ,WAAW,EAAE,KAAK,UAAU;AAC3C,cAAS,KAAK,GAAG,KAAK,SAAS;AAC/B,UAAK,WAAW,EAAE;;AAEpB;cACS,EAAE,WAAW,MAAM,EAAE,KAAK,MAAM;AACzC,QAAI,EAAE,WAAW,MAAM,EAAE,KAAK,MAAM;KAElC,MAAM,kBAAkB;AACxB,WAAM,EAAE,QAAQ,OAAO,MAAM,EAAE;AAC/B,SAAI,QAAQ,IAAI;AACd,UAAI,OACF,OAAM,YAAY,oBAAoB,gBAAgB;AACxD,YAAM,EAAE;AACR,UAAI,aACF,UAAS,KAAK,EAAE,UAAU,gBAAgB,CAAC;YAExC;AACL,aAAO;AACP,UAAI,aACF,UAAS,KAAK,EAAE,UAAU,iBAAiB,MAAM,EAAE,CAAC;;eAIxD,EAAE,WAAW,MAAM,EAAE,KAAK,YAC1B,EAAE,WAAW,MAAM,EAAE,KAAK,YAC1B,EAAE,UAAU,MAAM,GAAG,MAAM,EAAE,CAAC,aAAa,KAAK,SAChD;KAEA,MAAM,gBAAgB,EAAE,QAAQ,OAAO,IAAI;AAC3C,SAAI,kBAAkB,IAAI;AACxB,UAAI,OAAQ,OAAM,YAAY,yBAAyB;AACvD,eAAS,KAAK,EAAE,UAAU,MAAM,EAAE,CAAC;AACnC,YAAM,EAAE;YACH;AACL,eAAS,KAAK,EAAE,UAAU,MAAM,GAAG,cAAc,CAAC;AAClD,YAAM,gBAAgB;;AAExB;WACK;AAGL,YAAO;KACP,MAAM,eAAe,MAAM;AAC3B,YAAO,MAAM,EAAE,QAAQ;MACrB,MAAM,KAAK,EAAE,WAAW,IAAI;AAC5B,UAAI,MAAM,MAAM,OAAO,MAAM,OAAO,SAAU;AAC9C;;KAEF,MAAM,cAAc,EAAE,UAAU,cAAc,IAAI;KAGlD,IAAI,iBAAuD;AAC3D,YAAO,MAAM,EAAE,QAAQ;MACrB,MAAM,KAAK,EAAE,WAAW,IAAI;AAC5B,UAAI,OAAO,MAAM,OAAO,SAAU;AAElC,UAAI,MAAM,IAAI;AACZ;AACA;;AAGF,UAAI,OAAO,UAAU,OAAO,QAAQ;OAClC,MAAM,WAAW,EAAE,QAAQ,OAAO,SAAS,MAAM,MAAK,MAAM,EAAE;AAC9D,WAAI,aAAa,IAAI;AACnB,YAAI,OAAQ,OAAM,YAAY,uBAAuB;AACrD,cAAM,EAAE;AACR;;OAEF,MAAM,QAAQ,EAAE,UAAU,MAAM,GAAG,SAAS;AAC5C,WAAI,mBAAmB,KACrB,kBAAiB,OAAO,OAAO,KAAK;AACtC,sBAAgB,SAAS;AACzB,aAAM,WAAW;AACjB;;MAGF,MAAM,aAAa;AACnB,aAAO,MAAM,EAAE,QAAQ;OACrB,MAAM,KAAK,EAAE,WAAW,IAAI;AAC5B,WAAI,MAAM,MAAM,OAAO,MAAM,OAAO,SAAU;AAC9C;;MAEF,MAAM,QAAQ,EAAE,UAAU,YAAY,IAAI;AAC1C,UAAI,mBAAmB,KAAM,kBAAiB,OAAO,OAAO,KAAK;AACjE,qBAAgB,SAAS;;AAI3B,SAAI,MAAM,EAAE,UAAU,EAAE,WAAW,IAAI,KAAK,UAAU;AACpD;MACA,IAAI,uBAAuB;AAC3B,aAAO,wBAAwB,MAAM,EAAE,QAAQ;AAC7C,WAAI,EAAE,WAAW,IAAI,KAAK,SACxB,wBAAuB;YAClB;QAEL,MAAM,gBAAgB,EAAE,WAAW,IAAI;AACvC,YAAI,kBAAkB,UAAU,kBAAkB,QAAQ;AACxD,eAAM,EAAE,QACN,kBAAkB,SAAS,MAAM,MACjC,MAAM,EACP;AACD,aAAI,QAAQ,IAAI;AACd,gBAAM,EAAE;AACR;;;;AAIN;;AAGF,aAAO,MAAM,EAAE,UAAU,EAAE,WAAW,IAAI,IAAI,GAAI;;AAGpD,SAAI,WAAW,OAAO,EAAE,UAAU,EAAE,WAAW,IAAI,KAAK,IACtD,OAAM,YAAY,uBAAuB;AAE3C,cAAS,KAAK;MACZ,SAAS;MACT,YAAY;MACZ,UAAU,EAAE;MACb,CAAU;;AAEb;AACA;;AAGF;GACA,MAAM,UAAU,WAAW;GAC3B,IAAI,aAAmD;AAGvD,UAAO,EAAE,WAAW,IAAI,KAAK,MAAM,EAAE,MAAM;IACzC,IAAI,WAAW,EAAE,WAAW,IAAI;AAChC,QACG,WAAW,MAAM,WAAW,MAC5B,WAAW,MAAM,WAAW,OAC7B,aAAa,cACb,aAAa,SACb,WAAW,KACX;KACA,MAAM,OAAO,WAAW;KACxB,IAAI,OAAO,EAAE,WAAW,IAAI;AAC5B,YACE,QACA,SAAS,UACT,SAAS,UACT,EACG,OAAO,MAAM,OAAO,MACpB,OAAO,MAAM,OAAO,OACrB,SAAS,cACT,SAAS,SACT,OAAO,QAET,SAAS,IACT;AACA;AACA,aAAO,EAAE,WAAW,IAAI;;KAE1B,IAAI;AACJ,SAAI,SAAS,UAAU,SAAS,QAAQ;AACtC,cAAQ,aAAa;AACrB,UAAI,QAAQ,IAAI;OAEd,MAAM,OAAc;QAAE;QAAS;QAAY,UAAU,EAAE;QAAE;AACzD,gBAAS,KAAK,KAAK;AAEnB,cAAO,MAAM,SAAS,GAAG;QACvB,MAAM,QAAQ,MAAM,KAAK;QACzB,MAAM,SAAgB;SACpB,SAAS;SACT,YAAY,MAAM;SAClB;SACD;AACD,yBAAiB,MAAM;AACvB,mBAAW,MAAM;AACjB,iBAAS,KAAK,OAAO;;AAEvB,cAAO;;AAET,UAAI,OAAQ,SAAQ,OAAO,MAAM;YAC5B;AACL,cAAQ;AACR;;AAEF,SAAI,eAAe,KAAM,cAAa,OAAO,OAAO,KAAK;AACzD,gBAAY,QAAQ;;AAEtB;;AAEF,OAAI,UAAU,CAAC,EAAE,KACf,OAAM,YAAY,iBAAiB,QAAQ,GAAG;AAIhD,OACE,EAAE,WAAW,MAAM,EAAE,KAAK,SAC1B,EAAE,WAAW,MAAM,EAAE,KAAK,YAC1B,QAAQ,WAAW,EAAE,KAAK,KAE1B,KAAI,kBAAkB,QAAQ,cAAc,IAAI,QAAQ,EAAE;IAExD,MAAM,cAAc,OAAO,UAAU;IACrC,MAAM,QAAQ,MAAM;AACpB,UAAM,EAAE,QAAQ,aAAa,MAAM;IACnC,IAAI;AACJ,QAAI,QAAQ,IAAI;AACd,SAAI,OAAQ,OAAM,YAAY,iBAAiB,QAAQ,GAAG;AAC1D,mBAAc,CAAC,EAAE,UAAU,MAAM,CAAC;AAClC,WAAM,EAAE;WACH;AACL,mBAAc,CAAC,EAAE,UAAU,OAAO,IAAI,CAAC;AACvC,YAAO,YAAY;;IAErB,MAAM,OAAc;KAAE;KAAS;KAAY,UAAU;KAAa;AAClE,aAAS,KAAK,KAAK;AACnB,QAAI,QAAQ,WAAW,EAAE,KAAK,UAAU;AACtC,cAAS,KAAK,GAAG,KAAK,SAAS;AAC/B,UAAK,WAAW,EAAE;;cAEX,mBAAmB,QAAQ,CAAC,eAAe,IAAI,QAAQ,EAAE;AAElE;AACA,UAAM,KAAK;KAAE,SAAS;KAAgB;KAAY;KAAU,CAAC;AAC7D,qBAAiB;AACjB,eAAW,EAAE;UACR;AAEL;IACA,MAAM,OAAc;KAAE;KAAS;KAAY,UAAU,EAAE;KAAE;AACzD,aAAS,KAAK,KAAK;AACnB,QAAI,QAAQ,WAAW,EAAE,KAAK,UAAU;AACtC,cAAS,KAAK,GAAG,KAAK,SAAS;AAC/B,UAAK,WAAW,EAAE;;;QAGjB;AAEL;IACA,MAAM,OAAc;KAAE;KAAS;KAAY,UAAU,EAAE;KAAE;AACzD,aAAS,KAAK,KAAK;AACnB,QAAI,QAAQ,WAAW,EAAE,KAAK,UAAU;AACtC,cAAS,KAAK,GAAG,KAAK,SAAS;AAC/B,UAAK,WAAW,EAAE;;;SAGjB;GACL,IAAI,OAAO,WAAW;AACtB,OAAI,OAAQ,QAAO,OAAO,KAAK;AAC/B,OAAI,gBAAgB;IAClB,MAAM,UAAU,KAAK,MAAM;AAC3B,QAAI,QAAQ,SAAS,EACnB,UAAS,KAAK,QAAQ;cAGpB,KAAK,SAAS,EAChB,UAAS,KAAK,KAAK;AAGvB;;AAIJ,MAAI,UAAU,mBAAmB,GAC/B,OAAM,YAAY,iBAAiB,eAAe,GAAG;AAEvD,2BAAyB,SAAS;AAElC,SAAO,MAAM,SAAS,GAAG;GACvB,MAAM,QAAQ,MAAM,KAAK;GACzB,MAAM,OAAc;IAClB,SAAS;IACT,YAAY,MAAM;IAClB;IACD;AACD,oBAAiB,MAAM;AACvB,cAAW,MAAM;AACjB,YAAS,KAAK,KAAK;AACnB,4BAAyB,SAAS;;AAEpC,SAAO;;CAGT,SAAS,YAAoB;EAC3B,MAAM,QAAQ;AACd,QAAM,EAAE,QAAQ,KAAK,IAAI,GAAG;AAC5B,MAAI,QAAQ,GAAI,OAAM,EAAE;AACxB,SAAO,EAAE,UAAU,OAAO,MAAM,EAAE;;CAGpC,SAAS,YAAoB;EAC3B,MAAM,QAAQ;EACd,IAAI,WAAW,EAAE,WAAW,IAAI;AAChC,SACE,WAAW,MACP,SAAS,cAAc,IACvB,aAAa,SAEjB,YAAW,EAAE,WAAW,EAAE,IAAI;AAEhC,SAAO,EAAE,UAAU,OAAO,IAAI;;CAGhC,SAAS,YAAmB;AAC1B;EACA,MAAM,UAAU,WAAW;EAE3B,IAAI,aAAmD;EACvD,IAAI,WAA+B,EAAE;AAGrC,SAAO,EAAE,WAAW,IAAI,KAAK,MAAM,EAAE,MAAM;GACzC,IAAI,WAAW,EAAE,WAAW,IAAI;AAEhC,OACG,WAAW,MAAM,WAAW,MAC5B,WAAW,MAAM,WAAW,OAC7B,aAAa,cACb,aAAa,SACb,WAAW,KACX;IACA,MAAM,OAAO,WAAW;IAExB,IAAI,OAAO,EAAE,WAAW,IAAI;AAC5B,WACE,QACA,SAAS,UACT,SAAS,UACT,EACG,OAAO,MAAM,OAAO,MACpB,OAAO,MAAM,OAAO,OACrB,SAAS,cACT,SAAS,SACT,OAAO,QAET,SAAS,IACT;AACA;AACA,YAAO,EAAE,WAAW,IAAI;;IAE1B,IAAI;AACJ,QAAI,SAAS,UAAU,SAAS,QAAQ;AACtC,aAAQ,aAAa;AACrB,SAAI,QAAQ,GACV,QAAO;MAAE;MAAS;MAAY;MAAU;AAE1C,SAAI,OAAQ,SAAQ,OAAO,MAAM;WAC5B;AACL,aAAQ;AACR;;AAGF,QAAI,eAAe,KAAM,cAAa,OAAO,OAAO,KAAK;AACzD,eAAY,QAAQ;;AAEtB;;AAEF,MAAI,UAAU,CAAC,EAAE,KACf,OAAM,YAAY,iBAAiB,QAAQ,GAAG;AAIhD,MACE,EAAE,WAAW,MAAM,EAAE,KAAK,SAC1B,EAAE,WAAW,MAAM,EAAE,KAAK,YAC1B,QAAQ,WAAW,EAAE,KAAK,KAE1B,KAAI,kBAAkB,QAAQ,cAAc,IAAI,QAAQ,EAAE;GAExD,MAAM,WAAW,OAAO,UAAU;GAClC,MAAM,QAAQ,MAAM;AACpB,SAAM,EAAE,QAAQ,UAAU,MAAM;AAChC,OAAI,QAAQ,IAAI;AACd,QAAI,OAAQ,OAAM,YAAY,iBAAiB,QAAQ,GAAG;AAE1D,eAAW,CAAC,EAAE,UAAU,MAAM,CAAC;AAC/B,UAAM,EAAE;UACH;AACL,eAAW,CAAC,EAAE,UAAU,OAAO,IAAI,CAAC;AACpC,WAAO,SAAS;;aAET,mBAAmB,QAAQ,CAAC,eAAe,IAAI,QAAQ,EAAE;AAClE;AACA,cAAW,cAAc,QAAQ;QAEjC;MAGF;AAEF,SAAO;GAAE;GAAS;GAAY;GAAU;;CAG1C,SAAS,cAAsB;EAC7B,MAAM,gBAAgB,EAAE,WAAW,IAAI;EACvC,MAAM,gBAAgB,MAAM;AAC5B,QAAM,EAAE,QAAQ,kBAAkB,SAAS,MAAM,MAAK,cAAc;AACpE,SAAO,EAAE,UAAU,eAAe,IAAI;;CAGxC,SAAS,eAAuB;AAC9B,MAAI,CAAC,gBAAgB,YAAY,CAAC,gBAAgB,UAAW,QAAO;EACpE,MAAM,cAAc,IAAI,OACtB,QACE,aAAa,gBAAgB,SAAS,GACtC,eACA,aAAa,gBAAgB,UAAU,GACvC,QACH,CAAC,KAAK,EAAE;AACT,MAAI,YACF,QAAO,YAAY;MAEnB,QAAO;;CAIX,IAAI;AAEJ,KAAI,gBAAgB,cAAc,KAAA,GAAW;AAC3C,kBAAgB,WAAW,gBAAgB,YAAY;EACvD,MAAM,UAA8B,EAAE;AAEtC,UAAQ,MAAM,cAAc,MAAM,IAAI;AACpC,SAAM,EAAE,YAAY,KAAK,IAAI;AAC7B,OAAI,QAAQ,GACV,SAAQ,KAAK,WAAW,CAAC;AAE3B,OAAI,EAAE,MAAM,IAAI;AAChB,SAAM;;AAER,QAAM;YACG,gBAAgB,UACzB,OAAM,WAAW;KAEjB,OAAM,cAAc,GAAG;AAGzB,KAAI,gBAAgB,UAAU,MAAM,QAAQ,IAAI,CAC9C,OAAM,OAAO,KAAK,gBAAgB,OAAO;AAG3C,KACE,gBAAgB,UAChB,OAAO,QAAQ,YACf,CAAC,MAAM,QAAQ,IAAI,CAElB,KAAqB,MAAM;AAG9B,QAAO"}
|
package/dist/parser.mjs
ADDED
package/dist/sax.d.mts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
//#region src/saxEngine.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* saxEngine — a high-performance, synchronous, event-based streaming XML parser.
|
|
4
|
+
*
|
|
5
|
+
* This is an internal module used by `createSaxParser` and `XmlParseStream`.
|
|
6
|
+
* It is not part of the public API.
|
|
7
|
+
*
|
|
8
|
+
* Architecture: single-pass state machine with batch scanning. Each character is
|
|
9
|
+
* consumed exactly once. Within a chunk, hot-path states (text, tag names,
|
|
10
|
+
* attribute names/values, close tags) scan ahead with indexOf / charCodeAt loops
|
|
11
|
+
* to extract tokens via a single substring() rather than per-character +=.
|
|
12
|
+
*/
|
|
13
|
+
/** Attributes record emitted with opentag events. */
|
|
14
|
+
type Attributes = Record<string, string | null>;
|
|
15
|
+
//#endregion
|
|
16
|
+
//#region src/sax.d.ts
|
|
17
|
+
/** Event name → handler signature map for the SAX parser. */
|
|
18
|
+
interface SaxEventMap {
|
|
19
|
+
openTag: (tagName: string, attributes: Attributes) => void;
|
|
20
|
+
closeTag: (tagName: string) => void;
|
|
21
|
+
text: (text: string) => void;
|
|
22
|
+
cdata: (data: string) => void;
|
|
23
|
+
comment: (comment: string) => void;
|
|
24
|
+
processingInstruction: (name: string, body: string) => void;
|
|
25
|
+
doctype: (tagName: string, attributes: Attributes) => void;
|
|
26
|
+
}
|
|
27
|
+
/** SAX event names. */
|
|
28
|
+
type SaxEventName = keyof SaxEventMap;
|
|
29
|
+
/** EventEmitter-style SAX parser returned by `createSaxParser()`. */
|
|
30
|
+
interface SaxParser {
|
|
31
|
+
/** Register an event handler. */
|
|
32
|
+
on<E extends SaxEventName>(event: E, handler: SaxEventMap[E]): void;
|
|
33
|
+
/** Remove an event handler. */
|
|
34
|
+
off<E extends SaxEventName>(event: E, handler: SaxEventMap[E]): void;
|
|
35
|
+
/** Feed a chunk of XML to the parser. */
|
|
36
|
+
write(chunk: string): void;
|
|
37
|
+
/** Signal end-of-input and flush any remaining buffered data. */
|
|
38
|
+
close(): void;
|
|
39
|
+
}
|
|
40
|
+
/** Options for `createSaxParser()`. */
|
|
41
|
+
interface SaxParserOptions {
|
|
42
|
+
/**
|
|
43
|
+
* Enable HTML mode. Sets `selfClosingTags` and `rawContentTags` to their
|
|
44
|
+
* HTML defaults unless explicitly provided.
|
|
45
|
+
*/
|
|
46
|
+
html?: boolean;
|
|
47
|
+
/** Tag names that are self-closing (void). Defaults to HTML voids when `html: true`. */
|
|
48
|
+
selfClosingTags?: string[];
|
|
49
|
+
/** Tag names whose content is raw text. Defaults to `['script', 'style']` when `html: true`. */
|
|
50
|
+
rawContentTags?: string[];
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Create an EventEmitter-style SAX parser.
|
|
54
|
+
*
|
|
55
|
+
* Uses the internal SAX engine, but wraps it with `.on()` / `.off()` methods
|
|
56
|
+
* so handlers can be added and removed dynamically.
|
|
57
|
+
*
|
|
58
|
+
* @param options - Parser options (html mode, selfClosingTags, rawContentTags).
|
|
59
|
+
* @returns A SaxParser with `.on()`, `.off()`, `.write()`, `.close()`.
|
|
60
|
+
*/
|
|
61
|
+
declare function createSaxParser(options?: SaxParserOptions): SaxParser;
|
|
62
|
+
//#endregion
|
|
63
|
+
export { SaxEventMap, SaxEventName, SaxParser, SaxParserOptions, createSaxParser };
|
|
64
|
+
//# sourceMappingURL=sax.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sax.d.mts","names":[],"sources":["../src/saxEngine.ts","../src/sax.ts"],"mappings":";;AAsCA;;;;;;;;ACNA;;;KDMY,UAAA,GAAa,MAAA;;;;UCNR,WAAA;EACf,OAAA,GAAU,OAAA,UAAiB,UAAA,EAAY,UAAA;EACvC,QAAA,GAAW,OAAA;EACX,IAAA,GAAO,IAAA;EACP,KAAA,GAAQ,IAAA;EACR,OAAA,GAAU,OAAA;EACV,qBAAA,GAAwB,IAAA,UAAc,IAAA;EACtC,OAAA,GAAU,OAAA,UAAiB,UAAA,EAAY,UAAA;AAAA;;KAI7B,YAAA,SAAqB,WAAA;;UAGhB,SAAA;EAHL;EAKV,EAAA,WAAa,YAAA,EAAc,KAAA,EAAO,CAAA,EAAG,OAAA,EAAS,WAAA,CAAY,CAAA;;EAE1D,GAAA,WAAc,YAAA,EAAc,KAAA,EAAO,CAAA,EAAG,OAAA,EAAS,WAAA,CAAY,CAAA;EAPjB;EAS1C,KAAA,CAAM,KAAA;EANkB;EAQxB,KAAA;AAAA;;UAQe,gBAAA;EAd2C;;;;EAmB1D,IAAA;EAjB4D;EAmB5D,eAAA;EArBA;EAuBA,cAAA;AAAA;;;;;;;;;;iBAgBc,eAAA,CAAgB,OAAA,GAAU,gBAAA,GAAmB,SAAA"}
|