xml-sax-ts 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +127 -1
- package/dist/index.cjs +421 -197
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +30 -13
- package/dist/index.d.ts +30 -13
- package/dist/index.js +421 -197
- package/dist/index.js.map +1 -1
- package/package.json +6 -1
package/dist/index.js
CHANGED
|
@@ -21,49 +21,125 @@ var XmlSaxError = class extends Error {
|
|
|
21
21
|
};
|
|
22
22
|
|
|
23
23
|
// src/entities.ts
|
|
24
|
-
var NAMED_ENTITIES = {
|
|
25
|
-
lt: "<",
|
|
26
|
-
gt: ">",
|
|
27
|
-
amp: "&",
|
|
28
|
-
quot: '"',
|
|
29
|
-
apos: "'"
|
|
30
|
-
};
|
|
31
24
|
function decodeEntities(input, onError) {
|
|
32
|
-
|
|
25
|
+
const firstAmp = input.indexOf("&");
|
|
26
|
+
if (firstAmp === -1) {
|
|
27
|
+
return input;
|
|
28
|
+
}
|
|
29
|
+
let result = "";
|
|
33
30
|
let i = 0;
|
|
34
31
|
while (i < input.length) {
|
|
35
|
-
const
|
|
36
|
-
if (
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
const amp = input.indexOf("&", i);
|
|
33
|
+
if (amp === -1) {
|
|
34
|
+
if (i === 0) {
|
|
35
|
+
return input;
|
|
36
|
+
}
|
|
37
|
+
return i < input.length ? result + input.slice(i) : result;
|
|
40
38
|
}
|
|
41
|
-
|
|
39
|
+
if (amp > i) {
|
|
40
|
+
result += input.slice(i, amp);
|
|
41
|
+
}
|
|
42
|
+
const semi = input.indexOf(";", amp + 1);
|
|
42
43
|
if (semi === -1) {
|
|
43
|
-
const err = new XmlSaxError("Unterminated entity",
|
|
44
|
+
const err = new XmlSaxError("Unterminated entity", amp, 0, 0);
|
|
44
45
|
onError?.(err);
|
|
45
46
|
throw err;
|
|
46
47
|
}
|
|
47
|
-
const entity = input.slice(i + 1, semi);
|
|
48
48
|
let decoded;
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
const codePoint = Number.parseInt(entity.slice(1), 10);
|
|
54
|
-
decoded = decodeCodePoint(codePoint);
|
|
49
|
+
const marker = input[amp + 1];
|
|
50
|
+
if (marker === "#") {
|
|
51
|
+
const numeric = parseNumericEntity(input, amp + 2, semi);
|
|
52
|
+
decoded = numeric === void 0 ? void 0 : decodeCodePoint(numeric);
|
|
55
53
|
} else {
|
|
56
|
-
decoded =
|
|
54
|
+
decoded = decodeNamedEntity(input, amp + 1, semi);
|
|
57
55
|
}
|
|
58
56
|
if (decoded === void 0) {
|
|
59
|
-
const
|
|
57
|
+
const entity = input.slice(amp + 1, semi);
|
|
58
|
+
const err = new XmlSaxError(`Unknown entity: &${entity};`, amp, 0, 0);
|
|
60
59
|
onError?.(err);
|
|
61
60
|
throw err;
|
|
62
61
|
}
|
|
63
|
-
|
|
62
|
+
result += decoded;
|
|
64
63
|
i = semi + 1;
|
|
65
64
|
}
|
|
66
|
-
return
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
function decodeNamedEntity(input, start, end) {
|
|
68
|
+
const len = end - start;
|
|
69
|
+
if (len === 2) {
|
|
70
|
+
if (input[start] === "l" && input[start + 1] === "t") {
|
|
71
|
+
return "<";
|
|
72
|
+
}
|
|
73
|
+
if (input[start] === "g" && input[start + 1] === "t") {
|
|
74
|
+
return ">";
|
|
75
|
+
}
|
|
76
|
+
return void 0;
|
|
77
|
+
}
|
|
78
|
+
if (len === 3) {
|
|
79
|
+
if (input[start] === "a" && input[start + 1] === "m" && input[start + 2] === "p") {
|
|
80
|
+
return "&";
|
|
81
|
+
}
|
|
82
|
+
return void 0;
|
|
83
|
+
}
|
|
84
|
+
if (len === 4) {
|
|
85
|
+
const maybeQuot = input[start] === "q" && input[start + 1] === "u" && input[start + 2] === "o" && input[start + 3] === "t";
|
|
86
|
+
if (maybeQuot) {
|
|
87
|
+
return '"';
|
|
88
|
+
}
|
|
89
|
+
const maybeApos = input[start] === "a" && input[start + 1] === "p" && input[start + 2] === "o" && input[start + 3] === "s";
|
|
90
|
+
if (maybeApos) {
|
|
91
|
+
return "'";
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return void 0;
|
|
95
|
+
}
|
|
96
|
+
function parseNumericEntity(input, start, end) {
|
|
97
|
+
if (start >= end) {
|
|
98
|
+
return void 0;
|
|
99
|
+
}
|
|
100
|
+
let i = start;
|
|
101
|
+
let radix = 10;
|
|
102
|
+
const marker = input[i];
|
|
103
|
+
if (marker === "x" || marker === "X") {
|
|
104
|
+
radix = 16;
|
|
105
|
+
i += 1;
|
|
106
|
+
}
|
|
107
|
+
if (i >= end) {
|
|
108
|
+
return void 0;
|
|
109
|
+
}
|
|
110
|
+
let value = 0;
|
|
111
|
+
for (; i < end; i += 1) {
|
|
112
|
+
const ch = input[i];
|
|
113
|
+
if (ch === void 0) {
|
|
114
|
+
return void 0;
|
|
115
|
+
}
|
|
116
|
+
const digit = radix === 16 ? hexDigit(ch) : decimalDigit(ch);
|
|
117
|
+
if (digit === -1) {
|
|
118
|
+
return void 0;
|
|
119
|
+
}
|
|
120
|
+
value = value * radix + digit;
|
|
121
|
+
}
|
|
122
|
+
return value;
|
|
123
|
+
}
|
|
124
|
+
function decimalDigit(ch) {
|
|
125
|
+
const code = ch.charCodeAt(0) - 48;
|
|
126
|
+
if (code < 0 || code > 9) {
|
|
127
|
+
return -1;
|
|
128
|
+
}
|
|
129
|
+
return code;
|
|
130
|
+
}
|
|
131
|
+
function hexDigit(ch) {
|
|
132
|
+
const code = ch.charCodeAt(0);
|
|
133
|
+
if (code >= 48 && code <= 57) {
|
|
134
|
+
return code - 48;
|
|
135
|
+
}
|
|
136
|
+
if (code >= 65 && code <= 70) {
|
|
137
|
+
return code - 55;
|
|
138
|
+
}
|
|
139
|
+
if (code >= 97 && code <= 102) {
|
|
140
|
+
return code - 87;
|
|
141
|
+
}
|
|
142
|
+
return -1;
|
|
67
143
|
}
|
|
68
144
|
function decodeCodePoint(codePoint) {
|
|
69
145
|
if (!Number.isFinite(codePoint)) {
|
|
@@ -82,8 +158,7 @@ function splitTextForEntities(text) {
|
|
|
82
158
|
if (lastAmp === -1) {
|
|
83
159
|
return { emit: text, carry: "" };
|
|
84
160
|
}
|
|
85
|
-
|
|
86
|
-
if (nextSemi === -1) {
|
|
161
|
+
if (!text.includes(";", lastAmp + 1)) {
|
|
87
162
|
return {
|
|
88
163
|
emit: text.slice(0, lastAmp),
|
|
89
164
|
carry: text.slice(lastAmp)
|
|
@@ -96,10 +171,32 @@ function splitTextForEntities(text) {
|
|
|
96
171
|
var DEFAULT_OPTIONS = {
|
|
97
172
|
xmlns: true,
|
|
98
173
|
includeNamespaceAttributes: false,
|
|
99
|
-
allowDoctype: true
|
|
174
|
+
allowDoctype: true,
|
|
175
|
+
coalesceText: false,
|
|
176
|
+
trackPosition: true
|
|
100
177
|
};
|
|
101
178
|
var XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";
|
|
102
179
|
var XMLNS_NAMESPACE_URI = "http://www.w3.org/2000/xmlns/";
|
|
180
|
+
var WHITESPACE_RE = /\s/;
|
|
181
|
+
var CRLF_RE = /\r\n?/g;
|
|
182
|
+
var NAME_START_TABLE = new Uint8Array(128);
|
|
183
|
+
var NAME_CHAR_TABLE = new Uint8Array(128);
|
|
184
|
+
for (let code = 65; code <= 90; code += 1) {
|
|
185
|
+
NAME_START_TABLE[code] = 1;
|
|
186
|
+
NAME_CHAR_TABLE[code] = 1;
|
|
187
|
+
}
|
|
188
|
+
for (let code = 97; code <= 122; code += 1) {
|
|
189
|
+
NAME_START_TABLE[code] = 1;
|
|
190
|
+
NAME_CHAR_TABLE[code] = 1;
|
|
191
|
+
}
|
|
192
|
+
for (let code = 48; code <= 57; code += 1) {
|
|
193
|
+
NAME_CHAR_TABLE[code] = 1;
|
|
194
|
+
}
|
|
195
|
+
NAME_START_TABLE[95] = 1;
|
|
196
|
+
NAME_CHAR_TABLE[95] = 1;
|
|
197
|
+
NAME_CHAR_TABLE[58] = 1;
|
|
198
|
+
NAME_CHAR_TABLE[45] = 1;
|
|
199
|
+
NAME_CHAR_TABLE[46] = 1;
|
|
103
200
|
var XmlSaxParser = class {
|
|
104
201
|
constructor(options = {}) {
|
|
105
202
|
this.buffer = "";
|
|
@@ -115,7 +212,22 @@ var XmlSaxParser = class {
|
|
|
115
212
|
];
|
|
116
213
|
this.closed = false;
|
|
117
214
|
this.pendingCR = false;
|
|
118
|
-
this.
|
|
215
|
+
this.pendingTextParts = [];
|
|
216
|
+
this._rawAttrs = [];
|
|
217
|
+
const resolved = { ...DEFAULT_OPTIONS, ...options };
|
|
218
|
+
this.xmlns = resolved.xmlns;
|
|
219
|
+
this.includeNamespaceAttributes = resolved.includeNamespaceAttributes;
|
|
220
|
+
this.allowDoctype = resolved.allowDoctype;
|
|
221
|
+
this.coalesceText = resolved.coalesceText;
|
|
222
|
+
this.trackPosition = resolved.trackPosition;
|
|
223
|
+
this.onOpenTag = resolved.onOpenTag;
|
|
224
|
+
this.onCloseTag = resolved.onCloseTag;
|
|
225
|
+
this.onText = resolved.onText;
|
|
226
|
+
this.onCdata = resolved.onCdata;
|
|
227
|
+
this.onComment = resolved.onComment;
|
|
228
|
+
this.onProcessingInstruction = resolved.onProcessingInstruction;
|
|
229
|
+
this.onDoctype = resolved.onDoctype;
|
|
230
|
+
this.onError = resolved.onError;
|
|
119
231
|
}
|
|
120
232
|
feed(chunk) {
|
|
121
233
|
if (this.closed) {
|
|
@@ -133,6 +245,7 @@ var XmlSaxParser = class {
|
|
|
133
245
|
}
|
|
134
246
|
this._parseBuffer(true);
|
|
135
247
|
this._flushPendingCR();
|
|
248
|
+
this._flushTextBuffer();
|
|
136
249
|
if (this.buffer.length > 0) {
|
|
137
250
|
this._error("Unexpected end of input");
|
|
138
251
|
}
|
|
@@ -147,12 +260,21 @@ var XmlSaxParser = class {
|
|
|
147
260
|
const lt = this.buffer.indexOf("<", i);
|
|
148
261
|
if (lt === -1) {
|
|
149
262
|
const tail = this.buffer.slice(i);
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
263
|
+
if (!tail.includes("&")) {
|
|
264
|
+
if (tail.length > 0) {
|
|
265
|
+
this._emitText(tail, true);
|
|
266
|
+
this._advance(tail);
|
|
267
|
+
}
|
|
268
|
+
} else {
|
|
269
|
+
const split = splitTextForEntities(tail);
|
|
270
|
+
if (split.emit.length > 0) {
|
|
271
|
+
this._emitText(split.emit, true);
|
|
272
|
+
this._advance(split.emit);
|
|
273
|
+
}
|
|
274
|
+
this.buffer = split.carry;
|
|
275
|
+
return;
|
|
154
276
|
}
|
|
155
|
-
this.buffer =
|
|
277
|
+
this.buffer = "";
|
|
156
278
|
return;
|
|
157
279
|
}
|
|
158
280
|
if (lt > i) {
|
|
@@ -167,8 +289,7 @@ var XmlSaxParser = class {
|
|
|
167
289
|
if (consumed === null) {
|
|
168
290
|
break;
|
|
169
291
|
}
|
|
170
|
-
|
|
171
|
-
this._advance(markup);
|
|
292
|
+
this._advanceSpan(lt, lt + consumed);
|
|
172
293
|
i = lt + consumed;
|
|
173
294
|
}
|
|
174
295
|
this.buffer = this.buffer.slice(i);
|
|
@@ -178,35 +299,9 @@ var XmlSaxParser = class {
|
|
|
178
299
|
}
|
|
179
300
|
_parseMarkupFrom(start, final) {
|
|
180
301
|
assert(this.buffer[start] === "<", "Markup must start with '<'");
|
|
181
|
-
this._flushPendingCR();
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
if (end === -1) {
|
|
185
|
-
if (final) {
|
|
186
|
-
this._error("Unterminated comment");
|
|
187
|
-
}
|
|
188
|
-
return null;
|
|
189
|
-
}
|
|
190
|
-
const comment = this.buffer.slice(start + 4, end);
|
|
191
|
-
this.options.onComment?.(comment);
|
|
192
|
-
return end + 3 - start;
|
|
193
|
-
}
|
|
194
|
-
if (this.buffer.startsWith("<![CDATA[", start)) {
|
|
195
|
-
const end = this.buffer.indexOf("]]>", start + 9);
|
|
196
|
-
if (end === -1) {
|
|
197
|
-
if (final) {
|
|
198
|
-
this._error("Unterminated CDATA section");
|
|
199
|
-
}
|
|
200
|
-
return null;
|
|
201
|
-
}
|
|
202
|
-
const cdata = this.buffer.slice(start + 9, end);
|
|
203
|
-
const normalized = this._normalizeText(cdata, false);
|
|
204
|
-
if (normalized.length > 0) {
|
|
205
|
-
this.options.onCdata?.(normalized);
|
|
206
|
-
}
|
|
207
|
-
return end + 3 - start;
|
|
208
|
-
}
|
|
209
|
-
if (this.buffer.startsWith("<?", start)) {
|
|
302
|
+
if (this.pendingCR) this._flushPendingCR();
|
|
303
|
+
const secondCode = this.buffer.charCodeAt(start + 1);
|
|
304
|
+
if (secondCode === 63) {
|
|
210
305
|
const end = this.buffer.indexOf("?>", start + 2);
|
|
211
306
|
if (end === -1) {
|
|
212
307
|
if (final) {
|
|
@@ -215,30 +310,64 @@ var XmlSaxParser = class {
|
|
|
215
310
|
return null;
|
|
216
311
|
}
|
|
217
312
|
const body = this.buffer.slice(start + 2, end).trim();
|
|
218
|
-
const split = body.search(
|
|
313
|
+
const split = body.search(WHITESPACE_RE);
|
|
219
314
|
const target = split === -1 ? body : body.slice(0, split);
|
|
220
315
|
const data = split === -1 ? "" : body.slice(split).trim();
|
|
221
316
|
const pi = { target, body: data };
|
|
222
|
-
this.
|
|
317
|
+
this._flushTextBuffer();
|
|
318
|
+
this.onProcessingInstruction?.(pi);
|
|
223
319
|
return end + 2 - start;
|
|
224
320
|
}
|
|
225
|
-
if (
|
|
226
|
-
const
|
|
227
|
-
if (
|
|
228
|
-
|
|
229
|
-
|
|
321
|
+
if (secondCode === 33) {
|
|
322
|
+
const thirdCode = this.buffer.charCodeAt(start + 2);
|
|
323
|
+
if (thirdCode === 45 && this.buffer.charCodeAt(start + 3) === 45) {
|
|
324
|
+
const end = this.buffer.indexOf("-->", start + 4);
|
|
325
|
+
if (end === -1) {
|
|
326
|
+
if (final) {
|
|
327
|
+
this._error("Unterminated comment");
|
|
328
|
+
}
|
|
329
|
+
return null;
|
|
230
330
|
}
|
|
231
|
-
|
|
331
|
+
const comment = this.buffer.slice(start + 4, end);
|
|
332
|
+
this._flushTextBuffer();
|
|
333
|
+
this.onComment?.(comment);
|
|
334
|
+
return end + 3 - start;
|
|
232
335
|
}
|
|
233
|
-
if (
|
|
234
|
-
this.
|
|
336
|
+
if (thirdCode === 91 && this.buffer.startsWith("<![CDATA[", start)) {
|
|
337
|
+
const end = this.buffer.indexOf("]]>", start + 9);
|
|
338
|
+
if (end === -1) {
|
|
339
|
+
if (final) {
|
|
340
|
+
this._error("Unterminated CDATA section");
|
|
341
|
+
}
|
|
342
|
+
return null;
|
|
343
|
+
}
|
|
344
|
+
const cdata = this.buffer.slice(start + 9, end);
|
|
345
|
+
const normalized = this._normalizeText(cdata, false);
|
|
346
|
+
if (normalized.length > 0) {
|
|
347
|
+
this._flushTextBuffer();
|
|
348
|
+
this.onCdata?.(normalized);
|
|
349
|
+
}
|
|
350
|
+
return end + 3 - start;
|
|
351
|
+
}
|
|
352
|
+
if (thirdCode === 68 && this.buffer.startsWith("<!DOCTYPE", start)) {
|
|
353
|
+
const end = this._findDoctypeEnd(start + 9);
|
|
354
|
+
if (end === -1) {
|
|
355
|
+
if (final) {
|
|
356
|
+
this._error("Unterminated doctype declaration");
|
|
357
|
+
}
|
|
358
|
+
return null;
|
|
359
|
+
}
|
|
360
|
+
if (!this.allowDoctype) {
|
|
361
|
+
this._error("Doctype is not allowed");
|
|
362
|
+
}
|
|
363
|
+
const raw = this.buffer.slice(start + 9, end).trim();
|
|
364
|
+
const doctype = { raw };
|
|
365
|
+
this._flushTextBuffer();
|
|
366
|
+
this.onDoctype?.(doctype);
|
|
367
|
+
return end + 1 - start;
|
|
235
368
|
}
|
|
236
|
-
const raw = this.buffer.slice(start + 9, end).trim();
|
|
237
|
-
const doctype = { raw };
|
|
238
|
-
this.options.onDoctype?.(doctype);
|
|
239
|
-
return end + 1 - start;
|
|
240
369
|
}
|
|
241
|
-
if (
|
|
370
|
+
if (secondCode === 47) {
|
|
242
371
|
const end = this.buffer.indexOf(">", start + 2);
|
|
243
372
|
if (end === -1) {
|
|
244
373
|
if (final) {
|
|
@@ -246,12 +375,13 @@ var XmlSaxParser = class {
|
|
|
246
375
|
}
|
|
247
376
|
return null;
|
|
248
377
|
}
|
|
249
|
-
|
|
250
|
-
const parsed = this._parseName(
|
|
251
|
-
|
|
378
|
+
let i = this._skipWhitespace(this.buffer, start + 2, end);
|
|
379
|
+
const parsed = this._parseName(this.buffer, i, end);
|
|
380
|
+
i = this._skipWhitespace(this.buffer, parsed.end, end);
|
|
381
|
+
if (i !== end) {
|
|
252
382
|
this._error("Invalid closing tag");
|
|
253
383
|
}
|
|
254
|
-
this._handleCloseTag(parsed.name);
|
|
384
|
+
this._handleCloseTag(parsed.name, parsed.end);
|
|
255
385
|
return end + 1 - start;
|
|
256
386
|
}
|
|
257
387
|
const tagEnd = this._findTagEnd(start + 1);
|
|
@@ -261,30 +391,54 @@ var XmlSaxParser = class {
|
|
|
261
391
|
}
|
|
262
392
|
return null;
|
|
263
393
|
}
|
|
264
|
-
|
|
265
|
-
this._handleStartTag(content);
|
|
394
|
+
this._handleStartTagRange(start + 1, tagEnd);
|
|
266
395
|
return tagEnd + 1 - start;
|
|
267
396
|
}
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
const
|
|
271
|
-
const
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
ns = Object.create(ns);
|
|
397
|
+
_handleStartTagRange(start, end) {
|
|
398
|
+
this._flushTextBuffer();
|
|
399
|
+
const parsed = this._parseStartTagRange(start, end);
|
|
400
|
+
const selfClosing = parsed.selfClosing;
|
|
401
|
+
if (!this.xmlns) {
|
|
402
|
+
const plainName = parsed.name;
|
|
403
|
+
const attributes2 = /* @__PURE__ */ Object.create(null);
|
|
276
404
|
for (const attr of parsed.attributes) {
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
405
|
+
attributes2[attr.name] = attr.value;
|
|
406
|
+
}
|
|
407
|
+
const tag2 = {
|
|
408
|
+
name: plainName,
|
|
409
|
+
attributes: attributes2,
|
|
410
|
+
isSelfClosing: selfClosing
|
|
411
|
+
};
|
|
412
|
+
this.onOpenTag?.(tag2);
|
|
413
|
+
if (selfClosing) {
|
|
414
|
+
this.onCloseTag?.({ name: plainName });
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
this.elementStack.push({
|
|
418
|
+
rawName: parsed.name,
|
|
419
|
+
closeTag: { name: plainName }
|
|
420
|
+
});
|
|
421
|
+
return;
|
|
422
|
+
}
|
|
423
|
+
const parentNs = this._currentNs();
|
|
424
|
+
let ns = parentNs;
|
|
425
|
+
for (const attr of parsed.attributes) {
|
|
426
|
+
if (attr.name === "xmlns") {
|
|
427
|
+
if (ns === parentNs) {
|
|
428
|
+
ns = Object.create(parentNs);
|
|
429
|
+
}
|
|
430
|
+
ns[""] = attr.value;
|
|
431
|
+
} else if (attr.name.startsWith("xmlns:")) {
|
|
432
|
+
if (ns === parentNs) {
|
|
433
|
+
ns = Object.create(parentNs);
|
|
281
434
|
}
|
|
435
|
+
ns[attr.name.slice(6)] = attr.value;
|
|
282
436
|
}
|
|
283
437
|
}
|
|
284
438
|
const resolvedName = this._resolveName(parsed.name, ns);
|
|
285
|
-
const attributes =
|
|
439
|
+
const attributes = /* @__PURE__ */ Object.create(null);
|
|
286
440
|
for (const attr of parsed.attributes) {
|
|
287
|
-
if (
|
|
441
|
+
if (!this.includeNamespaceAttributes) {
|
|
288
442
|
if (attr.name === "xmlns" || attr.name.startsWith("xmlns:")) {
|
|
289
443
|
continue;
|
|
290
444
|
}
|
|
@@ -306,86 +460,118 @@ var XmlSaxParser = class {
|
|
|
306
460
|
attributes,
|
|
307
461
|
isSelfClosing: selfClosing
|
|
308
462
|
};
|
|
309
|
-
this.
|
|
463
|
+
this.onOpenTag?.(tag);
|
|
310
464
|
if (selfClosing) {
|
|
311
|
-
|
|
465
|
+
this.onCloseTag?.({
|
|
312
466
|
name: resolvedName.name,
|
|
313
467
|
prefix: resolvedName.prefix,
|
|
314
468
|
local: resolvedName.local,
|
|
315
469
|
uri: resolvedName.uri
|
|
316
|
-
};
|
|
317
|
-
this.options.onCloseTag?.(closeTag);
|
|
470
|
+
});
|
|
318
471
|
return;
|
|
319
472
|
}
|
|
320
|
-
this.elementStack.push({
|
|
473
|
+
this.elementStack.push({
|
|
474
|
+
rawName: parsed.name,
|
|
475
|
+
closeTag: {
|
|
476
|
+
name: resolvedName.name,
|
|
477
|
+
prefix: resolvedName.prefix,
|
|
478
|
+
local: resolvedName.local,
|
|
479
|
+
uri: resolvedName.uri
|
|
480
|
+
}
|
|
481
|
+
});
|
|
321
482
|
this.nsStack.push(ns);
|
|
322
483
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
const
|
|
326
|
-
if (!entry || !ns) {
|
|
327
|
-
this._error("Closing tag without matching start tag");
|
|
328
|
-
}
|
|
329
|
-
if (entry.rawName !== rawName) {
|
|
330
|
-
this._error(`Mismatched closing tag: expected </${entry.rawName}>`);
|
|
331
|
-
}
|
|
332
|
-
const closeTag = {
|
|
333
|
-
name: entry.resolved.name,
|
|
334
|
-
prefix: entry.resolved.prefix,
|
|
335
|
-
local: entry.resolved.local,
|
|
336
|
-
uri: entry.resolved.uri
|
|
337
|
-
};
|
|
338
|
-
this.options.onCloseTag?.(closeTag);
|
|
339
|
-
}
|
|
340
|
-
_parseTagBody(body) {
|
|
341
|
-
let i = 0;
|
|
342
|
-
const length = body.length;
|
|
343
|
-
i = this._skipWhitespace(body, i, length);
|
|
344
|
-
const parsedName = this._parseName(body, i, length);
|
|
484
|
+
_parseStartTagRange(start, end) {
|
|
485
|
+
let i = this._skipWhitespace(this.buffer, start, end);
|
|
486
|
+
const parsedName = this._parseName(this.buffer, i, end);
|
|
345
487
|
i = parsedName.end;
|
|
346
|
-
const attributes =
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
488
|
+
const attributes = this._rawAttrs;
|
|
489
|
+
attributes.length = 0;
|
|
490
|
+
let selfClosing = false;
|
|
491
|
+
while (i < end) {
|
|
492
|
+
i = this._skipWhitespace(this.buffer, i, end);
|
|
493
|
+
if (i >= end) {
|
|
494
|
+
break;
|
|
495
|
+
}
|
|
496
|
+
if (this.buffer.charCodeAt(i) === 47) {
|
|
497
|
+
i += 1;
|
|
498
|
+
i = this._skipWhitespace(this.buffer, i, end);
|
|
499
|
+
if (i !== end) {
|
|
500
|
+
this._error("Invalid self-closing tag");
|
|
501
|
+
}
|
|
502
|
+
selfClosing = true;
|
|
350
503
|
break;
|
|
351
504
|
}
|
|
352
|
-
const attrName = this._parseName(
|
|
505
|
+
const attrName = this._parseName(this.buffer, i, end);
|
|
353
506
|
i = attrName.end;
|
|
354
|
-
i = this._skipWhitespace(
|
|
355
|
-
if (
|
|
507
|
+
i = this._skipWhitespace(this.buffer, i, end);
|
|
508
|
+
if (this.buffer.charCodeAt(i) !== 61) {
|
|
356
509
|
this._error("Attribute without '='");
|
|
357
510
|
}
|
|
358
511
|
i += 1;
|
|
359
|
-
i = this._skipWhitespace(
|
|
360
|
-
const
|
|
361
|
-
if (
|
|
512
|
+
i = this._skipWhitespace(this.buffer, i, end);
|
|
513
|
+
const quoteCode = this.buffer.charCodeAt(i);
|
|
514
|
+
if (quoteCode !== 34 && quoteCode !== 39) {
|
|
362
515
|
this._error("Attribute value must be quoted");
|
|
363
516
|
}
|
|
517
|
+
const quote = String.fromCharCode(quoteCode);
|
|
364
518
|
i += 1;
|
|
365
|
-
const valueEnd =
|
|
366
|
-
if (valueEnd === -1) {
|
|
519
|
+
const valueEnd = this.buffer.indexOf(quote, i);
|
|
520
|
+
if (valueEnd === -1 || valueEnd >= end) {
|
|
367
521
|
this._error("Unterminated attribute value");
|
|
368
522
|
}
|
|
369
|
-
const rawValue =
|
|
370
|
-
const normalized = rawValue.replace(
|
|
371
|
-
const value = decodeEntities(normalized, this.
|
|
523
|
+
const rawValue = this.buffer.slice(i, valueEnd);
|
|
524
|
+
const normalized = rawValue.includes("\r") ? rawValue.replace(CRLF_RE, "\n") : rawValue;
|
|
525
|
+
const value = !normalized.includes("&") ? normalized : decodeEntities(normalized, this.onError);
|
|
372
526
|
attributes.push({ name: attrName.name, value });
|
|
373
527
|
i = valueEnd + 1;
|
|
374
528
|
}
|
|
375
|
-
return { name: parsedName.name, attributes };
|
|
529
|
+
return { name: parsedName.name, attributes, selfClosing };
|
|
530
|
+
}
|
|
531
|
+
_handleCloseTag(rawName, _nameEnd) {
|
|
532
|
+
this._flushTextBuffer();
|
|
533
|
+
const entry = this.elementStack.pop();
|
|
534
|
+
const ns = this.xmlns ? this.nsStack.pop() : this._currentNs();
|
|
535
|
+
if (!entry || !ns) {
|
|
536
|
+
this._error("Closing tag without matching start tag");
|
|
537
|
+
}
|
|
538
|
+
if (entry.rawName !== rawName) {
|
|
539
|
+
this._error(`Mismatched closing tag: expected </${entry.rawName}>`);
|
|
540
|
+
}
|
|
541
|
+
this.onCloseTag?.(entry.closeTag);
|
|
376
542
|
}
|
|
377
543
|
_emitText(text, allowPendingCR) {
|
|
378
544
|
const normalized = this._normalizeText(text, allowPendingCR);
|
|
379
545
|
if (normalized.length === 0) {
|
|
380
546
|
return;
|
|
381
547
|
}
|
|
382
|
-
|
|
548
|
+
if (!normalized.includes("&")) {
|
|
549
|
+
this._emitDecodedText(normalized);
|
|
550
|
+
return;
|
|
551
|
+
}
|
|
552
|
+
const decoded = decodeEntities(normalized, this.onError);
|
|
383
553
|
if (decoded.length > 0) {
|
|
384
|
-
this.
|
|
554
|
+
this._emitDecodedText(decoded);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
_emitDecodedText(text) {
|
|
558
|
+
if (!this.coalesceText) {
|
|
559
|
+
this.onText?.(text);
|
|
560
|
+
return;
|
|
561
|
+
}
|
|
562
|
+
this.pendingTextParts.push(text);
|
|
563
|
+
}
|
|
564
|
+
_flushTextBuffer() {
|
|
565
|
+
if (!this.coalesceText || this.pendingTextParts.length === 0) {
|
|
566
|
+
return;
|
|
385
567
|
}
|
|
568
|
+
const first = this.pendingTextParts[0];
|
|
569
|
+
const text = this.pendingTextParts.length === 1 && first !== void 0 ? first : this.pendingTextParts.join("");
|
|
570
|
+
this.pendingTextParts.length = 0;
|
|
571
|
+
this.onText?.(text);
|
|
386
572
|
}
|
|
387
573
|
_resolveName(rawName, ns) {
|
|
388
|
-
if (!this.
|
|
574
|
+
if (!this.xmlns) {
|
|
389
575
|
const split2 = rawName.indexOf(":");
|
|
390
576
|
if (split2 === -1) {
|
|
391
577
|
return { name: rawName, prefix: "", local: rawName, uri: "" };
|
|
@@ -420,7 +606,7 @@ var XmlSaxParser = class {
|
|
|
420
606
|
};
|
|
421
607
|
}
|
|
422
608
|
_resolveAttributeName(rawName, ns) {
|
|
423
|
-
if (!this.
|
|
609
|
+
if (!this.xmlns) {
|
|
424
610
|
return this._resolveName(rawName, ns);
|
|
425
611
|
}
|
|
426
612
|
if (rawName === "xmlns") {
|
|
@@ -454,49 +640,59 @@ var XmlSaxParser = class {
|
|
|
454
640
|
};
|
|
455
641
|
}
|
|
456
642
|
_findTagEnd(start) {
|
|
457
|
-
|
|
643
|
+
const quickEnd = this.buffer.indexOf(">", start);
|
|
644
|
+
if (quickEnd === -1) {
|
|
645
|
+
return -1;
|
|
646
|
+
}
|
|
647
|
+
const firstDoubleQuote = this.buffer.indexOf('"', start);
|
|
648
|
+
const firstSingleQuote = this.buffer.indexOf("'", start);
|
|
649
|
+
const firstQuote = firstDoubleQuote === -1 ? firstSingleQuote : firstSingleQuote === -1 ? firstDoubleQuote : Math.min(firstDoubleQuote, firstSingleQuote);
|
|
650
|
+
if (firstQuote === -1 || firstQuote > quickEnd) {
|
|
651
|
+
return quickEnd;
|
|
652
|
+
}
|
|
653
|
+
let quoteCode = 0;
|
|
458
654
|
for (let i = start; i < this.buffer.length; i += 1) {
|
|
459
|
-
const
|
|
460
|
-
if (
|
|
461
|
-
if (
|
|
462
|
-
|
|
655
|
+
const code = this.buffer.charCodeAt(i);
|
|
656
|
+
if (quoteCode) {
|
|
657
|
+
if (code === quoteCode) {
|
|
658
|
+
quoteCode = 0;
|
|
463
659
|
}
|
|
464
660
|
continue;
|
|
465
661
|
}
|
|
466
|
-
if (
|
|
467
|
-
|
|
662
|
+
if (code === 34 || code === 39) {
|
|
663
|
+
quoteCode = code;
|
|
468
664
|
continue;
|
|
469
665
|
}
|
|
470
|
-
if (
|
|
666
|
+
if (code === 62) {
|
|
471
667
|
return i;
|
|
472
668
|
}
|
|
473
669
|
}
|
|
474
670
|
return -1;
|
|
475
671
|
}
|
|
476
672
|
_findDoctypeEnd(start) {
|
|
477
|
-
let
|
|
673
|
+
let quoteCode = 0;
|
|
478
674
|
let bracketDepth = 0;
|
|
479
675
|
for (let i = start; i < this.buffer.length; i += 1) {
|
|
480
|
-
const
|
|
481
|
-
if (
|
|
482
|
-
if (
|
|
483
|
-
|
|
676
|
+
const code = this.buffer.charCodeAt(i);
|
|
677
|
+
if (quoteCode) {
|
|
678
|
+
if (code === quoteCode) {
|
|
679
|
+
quoteCode = 0;
|
|
484
680
|
}
|
|
485
681
|
continue;
|
|
486
682
|
}
|
|
487
|
-
if (
|
|
488
|
-
|
|
683
|
+
if (code === 34 || code === 39) {
|
|
684
|
+
quoteCode = code;
|
|
489
685
|
continue;
|
|
490
686
|
}
|
|
491
|
-
if (
|
|
687
|
+
if (code === 91) {
|
|
492
688
|
bracketDepth += 1;
|
|
493
689
|
continue;
|
|
494
690
|
}
|
|
495
|
-
if (
|
|
691
|
+
if (code === 93) {
|
|
496
692
|
bracketDepth = Math.max(0, bracketDepth - 1);
|
|
497
693
|
continue;
|
|
498
694
|
}
|
|
499
|
-
if (
|
|
695
|
+
if (code === 62 && bracketDepth === 0) {
|
|
500
696
|
return i;
|
|
501
697
|
}
|
|
502
698
|
}
|
|
@@ -506,34 +702,25 @@ var XmlSaxParser = class {
|
|
|
506
702
|
if (start >= end) {
|
|
507
703
|
this._error("Expected name");
|
|
508
704
|
}
|
|
509
|
-
const
|
|
510
|
-
if (
|
|
511
|
-
this._error(
|
|
512
|
-
}
|
|
513
|
-
if (!this._isNameStart(first)) {
|
|
514
|
-
this._error(`Invalid name start: '${first}'`);
|
|
705
|
+
const firstCode = input.charCodeAt(start);
|
|
706
|
+
if (firstCode !== firstCode || firstCode >= 128 || NAME_START_TABLE[firstCode] === 0) {
|
|
707
|
+
this._error(`Invalid name start: '${input[start] ?? ""}'`);
|
|
515
708
|
}
|
|
516
709
|
let i = start + 1;
|
|
517
710
|
while (i < end) {
|
|
518
|
-
const
|
|
519
|
-
if (
|
|
711
|
+
const code = input.charCodeAt(i);
|
|
712
|
+
if (code >= 128 || NAME_CHAR_TABLE[code] === 0) {
|
|
520
713
|
break;
|
|
521
714
|
}
|
|
522
715
|
i += 1;
|
|
523
716
|
}
|
|
524
717
|
return { name: input.slice(start, i), end: i };
|
|
525
718
|
}
|
|
526
|
-
_isNameStart(ch) {
|
|
527
|
-
return /[A-Za-z_]/.test(ch);
|
|
528
|
-
}
|
|
529
|
-
_isNameChar(ch) {
|
|
530
|
-
return /[A-Za-z0-9_:\-.]/.test(ch);
|
|
531
|
-
}
|
|
532
719
|
_skipWhitespace(input, start, end) {
|
|
533
720
|
let i = start;
|
|
534
721
|
while (i < end) {
|
|
535
|
-
const
|
|
536
|
-
if (
|
|
722
|
+
const code = input.charCodeAt(i);
|
|
723
|
+
if (code !== 32 && code !== 9 && code !== 10 && code !== 13) {
|
|
537
724
|
break;
|
|
538
725
|
}
|
|
539
726
|
i += 1;
|
|
@@ -545,12 +732,21 @@ var XmlSaxParser = class {
|
|
|
545
732
|
}
|
|
546
733
|
_advance(text) {
|
|
547
734
|
this.offset += text.length;
|
|
548
|
-
|
|
549
|
-
|
|
735
|
+
if (!this.trackPosition) {
|
|
736
|
+
return;
|
|
737
|
+
}
|
|
738
|
+
let pos = text.indexOf("\n");
|
|
739
|
+
if (pos === -1) {
|
|
550
740
|
this.column += text.length;
|
|
551
741
|
return;
|
|
552
742
|
}
|
|
553
|
-
|
|
743
|
+
let newlineCount = 0;
|
|
744
|
+
let lastNewline = -1;
|
|
745
|
+
while (pos !== -1) {
|
|
746
|
+
newlineCount += 1;
|
|
747
|
+
lastNewline = pos;
|
|
748
|
+
pos = text.indexOf("\n", pos + 1);
|
|
749
|
+
}
|
|
554
750
|
this.line += newlineCount;
|
|
555
751
|
this.column = text.length - lastNewline;
|
|
556
752
|
}
|
|
@@ -558,32 +754,58 @@ var XmlSaxParser = class {
|
|
|
558
754
|
if (!text) {
|
|
559
755
|
return "";
|
|
560
756
|
}
|
|
757
|
+
if (!this.pendingCR && !text.includes("\r")) {
|
|
758
|
+
return text;
|
|
759
|
+
}
|
|
561
760
|
let value = text;
|
|
562
761
|
let prefix = "";
|
|
563
762
|
if (this.pendingCR) {
|
|
564
763
|
prefix = "\n";
|
|
565
|
-
if (value.
|
|
764
|
+
if (value.charCodeAt(0) === 10) {
|
|
566
765
|
value = value.slice(1);
|
|
567
766
|
}
|
|
568
767
|
this.pendingCR = false;
|
|
569
768
|
}
|
|
570
|
-
if (allowPendingCR && value.
|
|
769
|
+
if (allowPendingCR && value.charCodeAt(value.length - 1) === 13) {
|
|
571
770
|
this.pendingCR = true;
|
|
572
771
|
value = value.slice(0, -1);
|
|
573
772
|
}
|
|
574
|
-
const normalized = value.replace(
|
|
575
|
-
return `${prefix}${normalized}
|
|
773
|
+
const normalized = !value.includes("\r") ? value : value.replace(CRLF_RE, "\n");
|
|
774
|
+
return prefix ? `${prefix}${normalized}` : normalized;
|
|
775
|
+
}
|
|
776
|
+
_advanceSpan(start, end) {
|
|
777
|
+
const length = end - start;
|
|
778
|
+
this.offset += length;
|
|
779
|
+
if (!this.trackPosition) {
|
|
780
|
+
return;
|
|
781
|
+
}
|
|
782
|
+
let pos = this.buffer.indexOf("\n", start);
|
|
783
|
+
if (pos === -1 || pos >= end) {
|
|
784
|
+
this.column += length;
|
|
785
|
+
return;
|
|
786
|
+
}
|
|
787
|
+
let newlineCount = 0;
|
|
788
|
+
let lastNewline = -1;
|
|
789
|
+
while (pos !== -1 && pos < end) {
|
|
790
|
+
newlineCount += 1;
|
|
791
|
+
lastNewline = pos;
|
|
792
|
+
pos = this.buffer.indexOf("\n", pos + 1);
|
|
793
|
+
}
|
|
794
|
+
this.line += newlineCount;
|
|
795
|
+
this.column = end - lastNewline;
|
|
576
796
|
}
|
|
577
797
|
_flushPendingCR() {
|
|
578
798
|
if (!this.pendingCR) {
|
|
579
799
|
return;
|
|
580
800
|
}
|
|
581
801
|
this.pendingCR = false;
|
|
582
|
-
this.
|
|
802
|
+
this._emitDecodedText("\n");
|
|
583
803
|
}
|
|
584
804
|
_error(message) {
|
|
585
|
-
const
|
|
586
|
-
this.
|
|
805
|
+
const line = this.trackPosition ? this.line : 0;
|
|
806
|
+
const column = this.trackPosition ? this.column : 0;
|
|
807
|
+
const error = new XmlSaxError(message, this.offset, line, column);
|
|
808
|
+
this.onError?.(error);
|
|
587
809
|
throw error;
|
|
588
810
|
}
|
|
589
811
|
};
|
|
@@ -597,7 +819,7 @@ var TreeBuilder = class {
|
|
|
597
819
|
const node = {
|
|
598
820
|
name: tag.name,
|
|
599
821
|
attributes: Object.fromEntries(
|
|
600
|
-
Object.entries(tag.attributes).map(([key, attr]) => [key, attr.value])
|
|
822
|
+
Object.entries(tag.attributes).map(([key, attr]) => [key, typeof attr === "string" ? attr : attr.value])
|
|
601
823
|
),
|
|
602
824
|
children: []
|
|
603
825
|
};
|
|
@@ -737,11 +959,13 @@ function stripNamespace(name) {
|
|
|
737
959
|
}
|
|
738
960
|
function resolveName(value) {
|
|
739
961
|
if (typeof value !== "string") {
|
|
962
|
+
const prefix = value.prefix ?? "";
|
|
963
|
+
const local = value.local ?? (prefix ? value.name.slice(prefix.length + 1) : value.name);
|
|
740
964
|
return {
|
|
741
965
|
name: value.name,
|
|
742
|
-
localName:
|
|
743
|
-
prefix
|
|
744
|
-
uri: value.uri
|
|
966
|
+
localName: local,
|
|
967
|
+
prefix,
|
|
968
|
+
uri: value.uri ?? ""
|
|
745
969
|
};
|
|
746
970
|
}
|
|
747
971
|
const index = value.indexOf(":");
|
|
@@ -872,7 +1096,7 @@ function normalizeAttributes(attributes, options) {
|
|
|
872
1096
|
const result = /* @__PURE__ */ Object.create(null);
|
|
873
1097
|
for (const [key, attr] of Object.entries(attributes)) {
|
|
874
1098
|
const name = normalizeName(key, options);
|
|
875
|
-
result[name] = attr.value;
|
|
1099
|
+
result[name] = typeof attr === "string" ? attr : attr.value;
|
|
876
1100
|
}
|
|
877
1101
|
return result;
|
|
878
1102
|
}
|