xml-sax-ts 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -23,49 +23,125 @@ var XmlSaxError = class extends Error {
23
23
  };
24
24
 
25
25
  // src/entities.ts
26
- var NAMED_ENTITIES = {
27
- lt: "<",
28
- gt: ">",
29
- amp: "&",
30
- quot: '"',
31
- apos: "'"
32
- };
33
26
  function decodeEntities(input, onError) {
34
- let output = "";
27
+ const firstAmp = input.indexOf("&");
28
+ if (firstAmp === -1) {
29
+ return input;
30
+ }
31
+ let result = "";
35
32
  let i = 0;
36
33
  while (i < input.length) {
37
- const ch = input[i];
38
- if (ch !== "&") {
39
- output += ch;
40
- i += 1;
41
- continue;
34
+ const amp = input.indexOf("&", i);
35
+ if (amp === -1) {
36
+ if (i === 0) {
37
+ return input;
38
+ }
39
+ return i < input.length ? result + input.slice(i) : result;
42
40
  }
43
- const semi = input.indexOf(";", i + 1);
41
+ if (amp > i) {
42
+ result += input.slice(i, amp);
43
+ }
44
+ const semi = input.indexOf(";", amp + 1);
44
45
  if (semi === -1) {
45
- const err = new XmlSaxError("Unterminated entity", i, 0, 0);
46
+ const err = new XmlSaxError("Unterminated entity", amp, 0, 0);
46
47
  onError?.(err);
47
48
  throw err;
48
49
  }
49
- const entity = input.slice(i + 1, semi);
50
50
  let decoded;
51
- if (entity.startsWith("#x") || entity.startsWith("#X")) {
52
- const codePoint = Number.parseInt(entity.slice(2), 16);
53
- decoded = decodeCodePoint(codePoint);
54
- } else if (entity.startsWith("#")) {
55
- const codePoint = Number.parseInt(entity.slice(1), 10);
56
- decoded = decodeCodePoint(codePoint);
51
+ const marker = input[amp + 1];
52
+ if (marker === "#") {
53
+ const numeric = parseNumericEntity(input, amp + 2, semi);
54
+ decoded = numeric === void 0 ? void 0 : decodeCodePoint(numeric);
57
55
  } else {
58
- decoded = NAMED_ENTITIES[entity];
56
+ decoded = decodeNamedEntity(input, amp + 1, semi);
59
57
  }
60
58
  if (decoded === void 0) {
61
- const err = new XmlSaxError(`Unknown entity: &${entity};`, i, 0, 0);
59
+ const entity = input.slice(amp + 1, semi);
60
+ const err = new XmlSaxError(`Unknown entity: &${entity};`, amp, 0, 0);
62
61
  onError?.(err);
63
62
  throw err;
64
63
  }
65
- output += decoded;
64
+ result += decoded;
66
65
  i = semi + 1;
67
66
  }
68
- return output;
67
+ return result;
68
+ }
69
+ function decodeNamedEntity(input, start, end) {
70
+ const len = end - start;
71
+ if (len === 2) {
72
+ if (input[start] === "l" && input[start + 1] === "t") {
73
+ return "<";
74
+ }
75
+ if (input[start] === "g" && input[start + 1] === "t") {
76
+ return ">";
77
+ }
78
+ return void 0;
79
+ }
80
+ if (len === 3) {
81
+ if (input[start] === "a" && input[start + 1] === "m" && input[start + 2] === "p") {
82
+ return "&";
83
+ }
84
+ return void 0;
85
+ }
86
+ if (len === 4) {
87
+ const maybeQuot = input[start] === "q" && input[start + 1] === "u" && input[start + 2] === "o" && input[start + 3] === "t";
88
+ if (maybeQuot) {
89
+ return '"';
90
+ }
91
+ const maybeApos = input[start] === "a" && input[start + 1] === "p" && input[start + 2] === "o" && input[start + 3] === "s";
92
+ if (maybeApos) {
93
+ return "'";
94
+ }
95
+ }
96
+ return void 0;
97
+ }
98
+ function parseNumericEntity(input, start, end) {
99
+ if (start >= end) {
100
+ return void 0;
101
+ }
102
+ let i = start;
103
+ let radix = 10;
104
+ const marker = input[i];
105
+ if (marker === "x" || marker === "X") {
106
+ radix = 16;
107
+ i += 1;
108
+ }
109
+ if (i >= end) {
110
+ return void 0;
111
+ }
112
+ let value = 0;
113
+ for (; i < end; i += 1) {
114
+ const ch = input[i];
115
+ if (ch === void 0) {
116
+ return void 0;
117
+ }
118
+ const digit = radix === 16 ? hexDigit(ch) : decimalDigit(ch);
119
+ if (digit === -1) {
120
+ return void 0;
121
+ }
122
+ value = value * radix + digit;
123
+ }
124
+ return value;
125
+ }
126
+ function decimalDigit(ch) {
127
+ const code = ch.charCodeAt(0) - 48;
128
+ if (code < 0 || code > 9) {
129
+ return -1;
130
+ }
131
+ return code;
132
+ }
133
+ function hexDigit(ch) {
134
+ const code = ch.charCodeAt(0);
135
+ if (code >= 48 && code <= 57) {
136
+ return code - 48;
137
+ }
138
+ if (code >= 65 && code <= 70) {
139
+ return code - 55;
140
+ }
141
+ if (code >= 97 && code <= 102) {
142
+ return code - 87;
143
+ }
144
+ return -1;
69
145
  }
70
146
  function decodeCodePoint(codePoint) {
71
147
  if (!Number.isFinite(codePoint)) {
@@ -84,8 +160,7 @@ function splitTextForEntities(text) {
84
160
  if (lastAmp === -1) {
85
161
  return { emit: text, carry: "" };
86
162
  }
87
- const nextSemi = text.indexOf(";", lastAmp + 1);
88
- if (nextSemi === -1) {
163
+ if (!text.includes(";", lastAmp + 1)) {
89
164
  return {
90
165
  emit: text.slice(0, lastAmp),
91
166
  carry: text.slice(lastAmp)
@@ -98,10 +173,32 @@ function splitTextForEntities(text) {
98
173
  var DEFAULT_OPTIONS = {
99
174
  xmlns: true,
100
175
  includeNamespaceAttributes: false,
101
- allowDoctype: true
176
+ allowDoctype: true,
177
+ coalesceText: false,
178
+ trackPosition: true
102
179
  };
103
180
  var XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";
104
181
  var XMLNS_NAMESPACE_URI = "http://www.w3.org/2000/xmlns/";
182
+ var WHITESPACE_RE = /\s/;
183
+ var CRLF_RE = /\r\n?/g;
184
+ var NAME_START_TABLE = new Uint8Array(128);
185
+ var NAME_CHAR_TABLE = new Uint8Array(128);
186
+ for (let code = 65; code <= 90; code += 1) {
187
+ NAME_START_TABLE[code] = 1;
188
+ NAME_CHAR_TABLE[code] = 1;
189
+ }
190
+ for (let code = 97; code <= 122; code += 1) {
191
+ NAME_START_TABLE[code] = 1;
192
+ NAME_CHAR_TABLE[code] = 1;
193
+ }
194
+ for (let code = 48; code <= 57; code += 1) {
195
+ NAME_CHAR_TABLE[code] = 1;
196
+ }
197
+ NAME_START_TABLE[95] = 1;
198
+ NAME_CHAR_TABLE[95] = 1;
199
+ NAME_CHAR_TABLE[58] = 1;
200
+ NAME_CHAR_TABLE[45] = 1;
201
+ NAME_CHAR_TABLE[46] = 1;
105
202
  var XmlSaxParser = class {
106
203
  constructor(options = {}) {
107
204
  this.buffer = "";
@@ -117,7 +214,22 @@ var XmlSaxParser = class {
117
214
  ];
118
215
  this.closed = false;
119
216
  this.pendingCR = false;
120
- this.options = { ...DEFAULT_OPTIONS, ...options };
217
+ this.pendingTextParts = [];
218
+ this._rawAttrs = [];
219
+ const resolved = { ...DEFAULT_OPTIONS, ...options };
220
+ this.xmlns = resolved.xmlns;
221
+ this.includeNamespaceAttributes = resolved.includeNamespaceAttributes;
222
+ this.allowDoctype = resolved.allowDoctype;
223
+ this.coalesceText = resolved.coalesceText;
224
+ this.trackPosition = resolved.trackPosition;
225
+ this.onOpenTag = resolved.onOpenTag;
226
+ this.onCloseTag = resolved.onCloseTag;
227
+ this.onText = resolved.onText;
228
+ this.onCdata = resolved.onCdata;
229
+ this.onComment = resolved.onComment;
230
+ this.onProcessingInstruction = resolved.onProcessingInstruction;
231
+ this.onDoctype = resolved.onDoctype;
232
+ this.onError = resolved.onError;
121
233
  }
122
234
  feed(chunk) {
123
235
  if (this.closed) {
@@ -135,6 +247,7 @@ var XmlSaxParser = class {
135
247
  }
136
248
  this._parseBuffer(true);
137
249
  this._flushPendingCR();
250
+ this._flushTextBuffer();
138
251
  if (this.buffer.length > 0) {
139
252
  this._error("Unexpected end of input");
140
253
  }
@@ -149,12 +262,21 @@ var XmlSaxParser = class {
149
262
  const lt = this.buffer.indexOf("<", i);
150
263
  if (lt === -1) {
151
264
  const tail = this.buffer.slice(i);
152
- const split = splitTextForEntities(tail);
153
- if (split.emit.length > 0) {
154
- this._emitText(split.emit, true);
155
- this._advance(split.emit);
265
+ if (!tail.includes("&")) {
266
+ if (tail.length > 0) {
267
+ this._emitText(tail, true);
268
+ this._advance(tail);
269
+ }
270
+ } else {
271
+ const split = splitTextForEntities(tail);
272
+ if (split.emit.length > 0) {
273
+ this._emitText(split.emit, true);
274
+ this._advance(split.emit);
275
+ }
276
+ this.buffer = split.carry;
277
+ return;
156
278
  }
157
- this.buffer = split.carry;
279
+ this.buffer = "";
158
280
  return;
159
281
  }
160
282
  if (lt > i) {
@@ -169,8 +291,7 @@ var XmlSaxParser = class {
169
291
  if (consumed === null) {
170
292
  break;
171
293
  }
172
- const markup = this.buffer.slice(lt, lt + consumed);
173
- this._advance(markup);
294
+ this._advanceSpan(lt, lt + consumed);
174
295
  i = lt + consumed;
175
296
  }
176
297
  this.buffer = this.buffer.slice(i);
@@ -180,35 +301,9 @@ var XmlSaxParser = class {
180
301
  }
181
302
  _parseMarkupFrom(start, final) {
182
303
  assert(this.buffer[start] === "<", "Markup must start with '<'");
183
- this._flushPendingCR();
184
- if (this.buffer.startsWith("<!--", start)) {
185
- const end = this.buffer.indexOf("-->", start + 4);
186
- if (end === -1) {
187
- if (final) {
188
- this._error("Unterminated comment");
189
- }
190
- return null;
191
- }
192
- const comment = this.buffer.slice(start + 4, end);
193
- this.options.onComment?.(comment);
194
- return end + 3 - start;
195
- }
196
- if (this.buffer.startsWith("<![CDATA[", start)) {
197
- const end = this.buffer.indexOf("]]>", start + 9);
198
- if (end === -1) {
199
- if (final) {
200
- this._error("Unterminated CDATA section");
201
- }
202
- return null;
203
- }
204
- const cdata = this.buffer.slice(start + 9, end);
205
- const normalized = this._normalizeText(cdata, false);
206
- if (normalized.length > 0) {
207
- this.options.onCdata?.(normalized);
208
- }
209
- return end + 3 - start;
210
- }
211
- if (this.buffer.startsWith("<?", start)) {
304
+ if (this.pendingCR) this._flushPendingCR();
305
+ const secondCode = this.buffer.charCodeAt(start + 1);
306
+ if (secondCode === 63) {
212
307
  const end = this.buffer.indexOf("?>", start + 2);
213
308
  if (end === -1) {
214
309
  if (final) {
@@ -217,30 +312,64 @@ var XmlSaxParser = class {
217
312
  return null;
218
313
  }
219
314
  const body = this.buffer.slice(start + 2, end).trim();
220
- const split = body.search(/\s/);
315
+ const split = body.search(WHITESPACE_RE);
221
316
  const target = split === -1 ? body : body.slice(0, split);
222
317
  const data = split === -1 ? "" : body.slice(split).trim();
223
318
  const pi = { target, body: data };
224
- this.options.onProcessingInstruction?.(pi);
319
+ this._flushTextBuffer();
320
+ this.onProcessingInstruction?.(pi);
225
321
  return end + 2 - start;
226
322
  }
227
- if (this.buffer.startsWith("<!DOCTYPE", start)) {
228
- const end = this._findDoctypeEnd(start + 9);
229
- if (end === -1) {
230
- if (final) {
231
- this._error("Unterminated doctype declaration");
323
+ if (secondCode === 33) {
324
+ const thirdCode = this.buffer.charCodeAt(start + 2);
325
+ if (thirdCode === 45 && this.buffer.charCodeAt(start + 3) === 45) {
326
+ const end = this.buffer.indexOf("-->", start + 4);
327
+ if (end === -1) {
328
+ if (final) {
329
+ this._error("Unterminated comment");
330
+ }
331
+ return null;
232
332
  }
233
- return null;
333
+ const comment = this.buffer.slice(start + 4, end);
334
+ this._flushTextBuffer();
335
+ this.onComment?.(comment);
336
+ return end + 3 - start;
234
337
  }
235
- if (!this.options.allowDoctype) {
236
- this._error("Doctype is not allowed");
338
+ if (thirdCode === 91 && this.buffer.startsWith("<![CDATA[", start)) {
339
+ const end = this.buffer.indexOf("]]>", start + 9);
340
+ if (end === -1) {
341
+ if (final) {
342
+ this._error("Unterminated CDATA section");
343
+ }
344
+ return null;
345
+ }
346
+ const cdata = this.buffer.slice(start + 9, end);
347
+ const normalized = this._normalizeText(cdata, false);
348
+ if (normalized.length > 0) {
349
+ this._flushTextBuffer();
350
+ this.onCdata?.(normalized);
351
+ }
352
+ return end + 3 - start;
353
+ }
354
+ if (thirdCode === 68 && this.buffer.startsWith("<!DOCTYPE", start)) {
355
+ const end = this._findDoctypeEnd(start + 9);
356
+ if (end === -1) {
357
+ if (final) {
358
+ this._error("Unterminated doctype declaration");
359
+ }
360
+ return null;
361
+ }
362
+ if (!this.allowDoctype) {
363
+ this._error("Doctype is not allowed");
364
+ }
365
+ const raw = this.buffer.slice(start + 9, end).trim();
366
+ const doctype = { raw };
367
+ this._flushTextBuffer();
368
+ this.onDoctype?.(doctype);
369
+ return end + 1 - start;
237
370
  }
238
- const raw = this.buffer.slice(start + 9, end).trim();
239
- const doctype = { raw };
240
- this.options.onDoctype?.(doctype);
241
- return end + 1 - start;
242
371
  }
243
- if (this.buffer.startsWith("</", start)) {
372
+ if (secondCode === 47) {
244
373
  const end = this.buffer.indexOf(">", start + 2);
245
374
  if (end === -1) {
246
375
  if (final) {
@@ -248,12 +377,13 @@ var XmlSaxParser = class {
248
377
  }
249
378
  return null;
250
379
  }
251
- const raw = this.buffer.slice(start + 2, end).trim();
252
- const parsed = this._parseName(raw, 0, raw.length);
253
- if (raw.slice(parsed.end).trim().length > 0) {
380
+ let i = this._skipWhitespace(this.buffer, start + 2, end);
381
+ const parsed = this._parseName(this.buffer, i, end);
382
+ i = this._skipWhitespace(this.buffer, parsed.end, end);
383
+ if (i !== end) {
254
384
  this._error("Invalid closing tag");
255
385
  }
256
- this._handleCloseTag(parsed.name);
386
+ this._handleCloseTag(parsed.name, parsed.end);
257
387
  return end + 1 - start;
258
388
  }
259
389
  const tagEnd = this._findTagEnd(start + 1);
@@ -263,30 +393,54 @@ var XmlSaxParser = class {
263
393
  }
264
394
  return null;
265
395
  }
266
- const content = this.buffer.slice(start + 1, tagEnd);
267
- this._handleStartTag(content);
396
+ this._handleStartTagRange(start + 1, tagEnd);
268
397
  return tagEnd + 1 - start;
269
398
  }
270
- _handleStartTag(content) {
271
- const trimmed = content.trim();
272
- const selfClosing = trimmed.endsWith("/");
273
- const body = selfClosing ? trimmed.slice(0, -1).trim() : trimmed;
274
- const parsed = this._parseTagBody(body);
275
- let ns = this._currentNs();
276
- if (this.options.xmlns) {
277
- ns = Object.create(ns);
399
+ _handleStartTagRange(start, end) {
400
+ this._flushTextBuffer();
401
+ const parsed = this._parseStartTagRange(start, end);
402
+ const selfClosing = parsed.selfClosing;
403
+ if (!this.xmlns) {
404
+ const plainName = parsed.name;
405
+ const attributes2 = /* @__PURE__ */ Object.create(null);
278
406
  for (const attr of parsed.attributes) {
279
- if (attr.name === "xmlns") {
280
- ns[""] = attr.value;
281
- } else if (attr.name.startsWith("xmlns:")) {
282
- ns[attr.name.slice(6)] = attr.value;
407
+ attributes2[attr.name] = attr.value;
408
+ }
409
+ const tag2 = {
410
+ name: plainName,
411
+ attributes: attributes2,
412
+ isSelfClosing: selfClosing
413
+ };
414
+ this.onOpenTag?.(tag2);
415
+ if (selfClosing) {
416
+ this.onCloseTag?.({ name: plainName });
417
+ return;
418
+ }
419
+ this.elementStack.push({
420
+ rawName: parsed.name,
421
+ closeTag: { name: plainName }
422
+ });
423
+ return;
424
+ }
425
+ const parentNs = this._currentNs();
426
+ let ns = parentNs;
427
+ for (const attr of parsed.attributes) {
428
+ if (attr.name === "xmlns") {
429
+ if (ns === parentNs) {
430
+ ns = Object.create(parentNs);
283
431
  }
432
+ ns[""] = attr.value;
433
+ } else if (attr.name.startsWith("xmlns:")) {
434
+ if (ns === parentNs) {
435
+ ns = Object.create(parentNs);
436
+ }
437
+ ns[attr.name.slice(6)] = attr.value;
284
438
  }
285
439
  }
286
440
  const resolvedName = this._resolveName(parsed.name, ns);
287
- const attributes = {};
441
+ const attributes = /* @__PURE__ */ Object.create(null);
288
442
  for (const attr of parsed.attributes) {
289
- if (this.options.xmlns && !this.options.includeNamespaceAttributes) {
443
+ if (!this.includeNamespaceAttributes) {
290
444
  if (attr.name === "xmlns" || attr.name.startsWith("xmlns:")) {
291
445
  continue;
292
446
  }
@@ -308,86 +462,118 @@ var XmlSaxParser = class {
308
462
  attributes,
309
463
  isSelfClosing: selfClosing
310
464
  };
311
- this.options.onOpenTag?.(tag);
465
+ this.onOpenTag?.(tag);
312
466
  if (selfClosing) {
313
- const closeTag = {
467
+ this.onCloseTag?.({
314
468
  name: resolvedName.name,
315
469
  prefix: resolvedName.prefix,
316
470
  local: resolvedName.local,
317
471
  uri: resolvedName.uri
318
- };
319
- this.options.onCloseTag?.(closeTag);
472
+ });
320
473
  return;
321
474
  }
322
- this.elementStack.push({ rawName: parsed.name, resolved: resolvedName, ns });
475
+ this.elementStack.push({
476
+ rawName: parsed.name,
477
+ closeTag: {
478
+ name: resolvedName.name,
479
+ prefix: resolvedName.prefix,
480
+ local: resolvedName.local,
481
+ uri: resolvedName.uri
482
+ }
483
+ });
323
484
  this.nsStack.push(ns);
324
485
  }
325
- _handleCloseTag(rawName) {
326
- const entry = this.elementStack.pop();
327
- const ns = this.nsStack.pop();
328
- if (!entry || !ns) {
329
- this._error("Closing tag without matching start tag");
330
- }
331
- if (entry.rawName !== rawName) {
332
- this._error(`Mismatched closing tag: expected </${entry.rawName}>`);
333
- }
334
- const closeTag = {
335
- name: entry.resolved.name,
336
- prefix: entry.resolved.prefix,
337
- local: entry.resolved.local,
338
- uri: entry.resolved.uri
339
- };
340
- this.options.onCloseTag?.(closeTag);
341
- }
342
- _parseTagBody(body) {
343
- let i = 0;
344
- const length = body.length;
345
- i = this._skipWhitespace(body, i, length);
346
- const parsedName = this._parseName(body, i, length);
486
+ _parseStartTagRange(start, end) {
487
+ let i = this._skipWhitespace(this.buffer, start, end);
488
+ const parsedName = this._parseName(this.buffer, i, end);
347
489
  i = parsedName.end;
348
- const attributes = [];
349
- while (i < length) {
350
- i = this._skipWhitespace(body, i, length);
351
- if (i >= length) {
490
+ const attributes = this._rawAttrs;
491
+ attributes.length = 0;
492
+ let selfClosing = false;
493
+ while (i < end) {
494
+ i = this._skipWhitespace(this.buffer, i, end);
495
+ if (i >= end) {
496
+ break;
497
+ }
498
+ if (this.buffer.charCodeAt(i) === 47) {
499
+ i += 1;
500
+ i = this._skipWhitespace(this.buffer, i, end);
501
+ if (i !== end) {
502
+ this._error("Invalid self-closing tag");
503
+ }
504
+ selfClosing = true;
352
505
  break;
353
506
  }
354
- const attrName = this._parseName(body, i, length);
507
+ const attrName = this._parseName(this.buffer, i, end);
355
508
  i = attrName.end;
356
- i = this._skipWhitespace(body, i, length);
357
- if (body[i] !== "=") {
509
+ i = this._skipWhitespace(this.buffer, i, end);
510
+ if (this.buffer.charCodeAt(i) !== 61) {
358
511
  this._error("Attribute without '='");
359
512
  }
360
513
  i += 1;
361
- i = this._skipWhitespace(body, i, length);
362
- const quote = body[i];
363
- if (quote !== '"' && quote !== "'") {
514
+ i = this._skipWhitespace(this.buffer, i, end);
515
+ const quoteCode = this.buffer.charCodeAt(i);
516
+ if (quoteCode !== 34 && quoteCode !== 39) {
364
517
  this._error("Attribute value must be quoted");
365
518
  }
519
+ const quote = String.fromCharCode(quoteCode);
366
520
  i += 1;
367
- const valueEnd = body.indexOf(quote, i);
368
- if (valueEnd === -1) {
521
+ const valueEnd = this.buffer.indexOf(quote, i);
522
+ if (valueEnd === -1 || valueEnd >= end) {
369
523
  this._error("Unterminated attribute value");
370
524
  }
371
- const rawValue = body.slice(i, valueEnd);
372
- const normalized = rawValue.replace(/\r\n?/g, "\n");
373
- const value = decodeEntities(normalized, this.options.onError);
525
+ const rawValue = this.buffer.slice(i, valueEnd);
526
+ const normalized = rawValue.includes("\r") ? rawValue.replace(CRLF_RE, "\n") : rawValue;
527
+ const value = !normalized.includes("&") ? normalized : decodeEntities(normalized, this.onError);
374
528
  attributes.push({ name: attrName.name, value });
375
529
  i = valueEnd + 1;
376
530
  }
377
- return { name: parsedName.name, attributes };
531
+ return { name: parsedName.name, attributes, selfClosing };
532
+ }
533
+ _handleCloseTag(rawName, _nameEnd) {
534
+ this._flushTextBuffer();
535
+ const entry = this.elementStack.pop();
536
+ const ns = this.xmlns ? this.nsStack.pop() : this._currentNs();
537
+ if (!entry || !ns) {
538
+ this._error("Closing tag without matching start tag");
539
+ }
540
+ if (entry.rawName !== rawName) {
541
+ this._error(`Mismatched closing tag: expected </${entry.rawName}>`);
542
+ }
543
+ this.onCloseTag?.(entry.closeTag);
378
544
  }
379
545
  _emitText(text, allowPendingCR) {
380
546
  const normalized = this._normalizeText(text, allowPendingCR);
381
547
  if (normalized.length === 0) {
382
548
  return;
383
549
  }
384
- const decoded = decodeEntities(normalized, this.options.onError);
550
+ if (!normalized.includes("&")) {
551
+ this._emitDecodedText(normalized);
552
+ return;
553
+ }
554
+ const decoded = decodeEntities(normalized, this.onError);
385
555
  if (decoded.length > 0) {
386
- this.options.onText?.(decoded);
556
+ this._emitDecodedText(decoded);
557
+ }
558
+ }
559
+ _emitDecodedText(text) {
560
+ if (!this.coalesceText) {
561
+ this.onText?.(text);
562
+ return;
563
+ }
564
+ this.pendingTextParts.push(text);
565
+ }
566
+ _flushTextBuffer() {
567
+ if (!this.coalesceText || this.pendingTextParts.length === 0) {
568
+ return;
387
569
  }
570
+ const first = this.pendingTextParts[0];
571
+ const text = this.pendingTextParts.length === 1 && first !== void 0 ? first : this.pendingTextParts.join("");
572
+ this.pendingTextParts.length = 0;
573
+ this.onText?.(text);
388
574
  }
389
575
  _resolveName(rawName, ns) {
390
- if (!this.options.xmlns) {
576
+ if (!this.xmlns) {
391
577
  const split2 = rawName.indexOf(":");
392
578
  if (split2 === -1) {
393
579
  return { name: rawName, prefix: "", local: rawName, uri: "" };
@@ -422,7 +608,7 @@ var XmlSaxParser = class {
422
608
  };
423
609
  }
424
610
  _resolveAttributeName(rawName, ns) {
425
- if (!this.options.xmlns) {
611
+ if (!this.xmlns) {
426
612
  return this._resolveName(rawName, ns);
427
613
  }
428
614
  if (rawName === "xmlns") {
@@ -456,49 +642,59 @@ var XmlSaxParser = class {
456
642
  };
457
643
  }
458
644
  _findTagEnd(start) {
459
- let quote = null;
645
+ const quickEnd = this.buffer.indexOf(">", start);
646
+ if (quickEnd === -1) {
647
+ return -1;
648
+ }
649
+ const firstDoubleQuote = this.buffer.indexOf('"', start);
650
+ const firstSingleQuote = this.buffer.indexOf("'", start);
651
+ const firstQuote = firstDoubleQuote === -1 ? firstSingleQuote : firstSingleQuote === -1 ? firstDoubleQuote : Math.min(firstDoubleQuote, firstSingleQuote);
652
+ if (firstQuote === -1 || firstQuote > quickEnd) {
653
+ return quickEnd;
654
+ }
655
+ let quoteCode = 0;
460
656
  for (let i = start; i < this.buffer.length; i += 1) {
461
- const ch = this.buffer[i];
462
- if (quote) {
463
- if (ch === quote) {
464
- quote = null;
657
+ const code = this.buffer.charCodeAt(i);
658
+ if (quoteCode) {
659
+ if (code === quoteCode) {
660
+ quoteCode = 0;
465
661
  }
466
662
  continue;
467
663
  }
468
- if (ch === '"' || ch === "'") {
469
- quote = ch;
664
+ if (code === 34 || code === 39) {
665
+ quoteCode = code;
470
666
  continue;
471
667
  }
472
- if (ch === ">") {
668
+ if (code === 62) {
473
669
  return i;
474
670
  }
475
671
  }
476
672
  return -1;
477
673
  }
478
674
  _findDoctypeEnd(start) {
479
- let quote = null;
675
+ let quoteCode = 0;
480
676
  let bracketDepth = 0;
481
677
  for (let i = start; i < this.buffer.length; i += 1) {
482
- const ch = this.buffer[i];
483
- if (quote) {
484
- if (ch === quote) {
485
- quote = null;
678
+ const code = this.buffer.charCodeAt(i);
679
+ if (quoteCode) {
680
+ if (code === quoteCode) {
681
+ quoteCode = 0;
486
682
  }
487
683
  continue;
488
684
  }
489
- if (ch === '"' || ch === "'") {
490
- quote = ch;
685
+ if (code === 34 || code === 39) {
686
+ quoteCode = code;
491
687
  continue;
492
688
  }
493
- if (ch === "[") {
689
+ if (code === 91) {
494
690
  bracketDepth += 1;
495
691
  continue;
496
692
  }
497
- if (ch === "]") {
693
+ if (code === 93) {
498
694
  bracketDepth = Math.max(0, bracketDepth - 1);
499
695
  continue;
500
696
  }
501
- if (ch === ">" && bracketDepth === 0) {
697
+ if (code === 62 && bracketDepth === 0) {
502
698
  return i;
503
699
  }
504
700
  }
@@ -508,34 +704,25 @@ var XmlSaxParser = class {
508
704
  if (start >= end) {
509
705
  this._error("Expected name");
510
706
  }
511
- const first = input[start];
512
- if (first === void 0) {
513
- this._error("Expected name");
514
- }
515
- if (!this._isNameStart(first)) {
516
- this._error(`Invalid name start: '${first}'`);
707
+ const firstCode = input.charCodeAt(start);
708
+ if (firstCode !== firstCode || firstCode >= 128 || NAME_START_TABLE[firstCode] === 0) {
709
+ this._error(`Invalid name start: '${input[start] ?? ""}'`);
517
710
  }
518
711
  let i = start + 1;
519
712
  while (i < end) {
520
- const ch = input[i];
521
- if (ch === void 0 || !this._isNameChar(ch)) {
713
+ const code = input.charCodeAt(i);
714
+ if (code >= 128 || NAME_CHAR_TABLE[code] === 0) {
522
715
  break;
523
716
  }
524
717
  i += 1;
525
718
  }
526
719
  return { name: input.slice(start, i), end: i };
527
720
  }
528
- _isNameStart(ch) {
529
- return /[A-Za-z_]/.test(ch);
530
- }
531
- _isNameChar(ch) {
532
- return /[A-Za-z0-9_:\-.]/.test(ch);
533
- }
534
721
  _skipWhitespace(input, start, end) {
535
722
  let i = start;
536
723
  while (i < end) {
537
- const ch = input[i];
538
- if (ch === void 0 || !/\s/.test(ch)) {
724
+ const code = input.charCodeAt(i);
725
+ if (code !== 32 && code !== 9 && code !== 10 && code !== 13) {
539
726
  break;
540
727
  }
541
728
  i += 1;
@@ -547,12 +734,21 @@ var XmlSaxParser = class {
547
734
  }
548
735
  _advance(text) {
549
736
  this.offset += text.length;
550
- const lastNewline = text.lastIndexOf("\n");
551
- if (lastNewline === -1) {
737
+ if (!this.trackPosition) {
738
+ return;
739
+ }
740
+ let pos = text.indexOf("\n");
741
+ if (pos === -1) {
552
742
  this.column += text.length;
553
743
  return;
554
744
  }
555
- const newlineCount = text.split("\n").length - 1;
745
+ let newlineCount = 0;
746
+ let lastNewline = -1;
747
+ while (pos !== -1) {
748
+ newlineCount += 1;
749
+ lastNewline = pos;
750
+ pos = text.indexOf("\n", pos + 1);
751
+ }
556
752
  this.line += newlineCount;
557
753
  this.column = text.length - lastNewline;
558
754
  }
@@ -560,32 +756,58 @@ var XmlSaxParser = class {
560
756
  if (!text) {
561
757
  return "";
562
758
  }
759
+ if (!this.pendingCR && !text.includes("\r")) {
760
+ return text;
761
+ }
563
762
  let value = text;
564
763
  let prefix = "";
565
764
  if (this.pendingCR) {
566
765
  prefix = "\n";
567
- if (value.startsWith("\n")) {
766
+ if (value.charCodeAt(0) === 10) {
568
767
  value = value.slice(1);
569
768
  }
570
769
  this.pendingCR = false;
571
770
  }
572
- if (allowPendingCR && value.endsWith("\r")) {
771
+ if (allowPendingCR && value.charCodeAt(value.length - 1) === 13) {
573
772
  this.pendingCR = true;
574
773
  value = value.slice(0, -1);
575
774
  }
576
- const normalized = value.replace(/\r\n?/g, "\n");
577
- return `${prefix}${normalized}`;
775
+ const normalized = !value.includes("\r") ? value : value.replace(CRLF_RE, "\n");
776
+ return prefix ? `${prefix}${normalized}` : normalized;
777
+ }
778
+ _advanceSpan(start, end) {
779
+ const length = end - start;
780
+ this.offset += length;
781
+ if (!this.trackPosition) {
782
+ return;
783
+ }
784
+ let pos = this.buffer.indexOf("\n", start);
785
+ if (pos === -1 || pos >= end) {
786
+ this.column += length;
787
+ return;
788
+ }
789
+ let newlineCount = 0;
790
+ let lastNewline = -1;
791
+ while (pos !== -1 && pos < end) {
792
+ newlineCount += 1;
793
+ lastNewline = pos;
794
+ pos = this.buffer.indexOf("\n", pos + 1);
795
+ }
796
+ this.line += newlineCount;
797
+ this.column = end - lastNewline;
578
798
  }
579
799
  _flushPendingCR() {
580
800
  if (!this.pendingCR) {
581
801
  return;
582
802
  }
583
803
  this.pendingCR = false;
584
- this.options.onText?.("\n");
804
+ this._emitDecodedText("\n");
585
805
  }
586
806
  _error(message) {
587
- const error = new XmlSaxError(message, this.offset, this.line, this.column);
588
- this.options.onError?.(error);
807
+ const line = this.trackPosition ? this.line : 0;
808
+ const column = this.trackPosition ? this.column : 0;
809
+ const error = new XmlSaxError(message, this.offset, line, column);
810
+ this.onError?.(error);
589
811
  throw error;
590
812
  }
591
813
  };
@@ -599,7 +821,7 @@ var TreeBuilder = class {
599
821
  const node = {
600
822
  name: tag.name,
601
823
  attributes: Object.fromEntries(
602
- Object.entries(tag.attributes).map(([key, attr]) => [key, attr.value])
824
+ Object.entries(tag.attributes).map(([key, attr]) => [key, typeof attr === "string" ? attr : attr.value])
603
825
  ),
604
826
  children: []
605
827
  };
@@ -739,11 +961,13 @@ function stripNamespace(name) {
739
961
  }
740
962
  function resolveName(value) {
741
963
  if (typeof value !== "string") {
964
+ const prefix = value.prefix ?? "";
965
+ const local = value.local ?? (prefix ? value.name.slice(prefix.length + 1) : value.name);
742
966
  return {
743
967
  name: value.name,
744
- localName: value.local,
745
- prefix: value.prefix,
746
- uri: value.uri
968
+ localName: local,
969
+ prefix,
970
+ uri: value.uri ?? ""
747
971
  };
748
972
  }
749
973
  const index = value.indexOf(":");
@@ -761,6 +985,16 @@ function buildObject(root, options = {}) {
761
985
  const settings = buildSettings(options);
762
986
  return buildNode(root, settings, []);
763
987
  }
988
+ function buildXmlNode(obj, options = {}) {
989
+ const settings = buildXmlSettings(options);
990
+ const root = resolveRoot(obj, settings);
991
+ const rootName = normalizeName(root.name, settings);
992
+ return buildElement(rootName, root.value, settings, []);
993
+ }
994
+ function objectToXml(obj, options = {}) {
995
+ const node = buildXmlNode(obj, options);
996
+ return serializeXml(node, options);
997
+ }
764
998
  var ObjectBuilder = class {
765
999
  constructor(options = {}) {
766
1000
  this.stack = [];
@@ -823,6 +1057,9 @@ var ObjectBuilder = class {
823
1057
  function buildSettings(options) {
824
1058
  return { ...DEFAULT_OBJECT_OPTIONS, ...options };
825
1059
  }
1060
+ function buildXmlSettings(options) {
1061
+ return { ...DEFAULT_OBJECT_OPTIONS, ...options };
1062
+ }
826
1063
  function buildNode(node, options, path) {
827
1064
  const name = normalizeName(node.name, options);
828
1065
  const attributes = normalizeAttributeMap(node.attributes ?? {}, options);
@@ -851,11 +1088,17 @@ function normalizeName(name, options) {
851
1088
  }
852
1089
  return name;
853
1090
  }
1091
+ function normalizeXmlName(name, options) {
1092
+ if (options.stripNamespaces) {
1093
+ return stripNamespace(name);
1094
+ }
1095
+ return name;
1096
+ }
854
1097
  function normalizeAttributes(attributes, options) {
855
1098
  const result = /* @__PURE__ */ Object.create(null);
856
1099
  for (const [key, attr] of Object.entries(attributes)) {
857
1100
  const name = normalizeName(key, options);
858
- result[name] = attr.value;
1101
+ result[name] = typeof attr === "string" ? attr : attr.value;
859
1102
  }
860
1103
  return result;
861
1104
  }
@@ -890,6 +1133,151 @@ function shouldForceArray(name, path, options) {
890
1133
  }
891
1134
  return rule(name, path);
892
1135
  }
1136
+ function resolveRoot(obj, options) {
1137
+ if (isRecord(obj)) {
1138
+ const keys = Object.keys(obj);
1139
+ if (keys.length === 1) {
1140
+ const name = keys[0] ?? "";
1141
+ return { name, value: obj[name] };
1142
+ }
1143
+ }
1144
+ if (!options.rootName) {
1145
+ throw new Error("Root element name is required when object has multiple keys");
1146
+ }
1147
+ return { name: options.rootName, value: obj };
1148
+ }
1149
+ function buildElement(name, value, options, path) {
1150
+ const attributes = /* @__PURE__ */ Object.create(null);
1151
+ const children = [];
1152
+ const nextPath = [...path, name];
1153
+ if (Array.isArray(value)) {
1154
+ for (const item of value) {
1155
+ appendContent(children, item, options, nextPath);
1156
+ }
1157
+ return finalizeNode(name, attributes, children);
1158
+ }
1159
+ if (isPrimitive(value)) {
1160
+ const text = coerceText(value);
1161
+ if (text !== null) {
1162
+ children.push(text);
1163
+ }
1164
+ return finalizeNode(name, attributes, children);
1165
+ }
1166
+ if (isRecord(value)) {
1167
+ for (const [key, entryValue] of Object.entries(value)) {
1168
+ if (isAttributeKey(key, options)) {
1169
+ const attrName = normalizeXmlName(key.slice(options.attributePrefix.length), options);
1170
+ const attrValue = coerceText(entryValue);
1171
+ if (attrValue !== null) {
1172
+ attributes[attrName] = attrValue;
1173
+ }
1174
+ continue;
1175
+ }
1176
+ if (key === options.textKey) {
1177
+ appendText(children, entryValue, options);
1178
+ continue;
1179
+ }
1180
+ const childName = normalizeXmlName(key, options);
1181
+ addChildElements(children, childName, entryValue, options, nextPath);
1182
+ }
1183
+ }
1184
+ return finalizeNode(name, attributes, children);
1185
+ }
1186
+ function addChildElements(children, name, value, options, path) {
1187
+ const forcedArray = shouldForceArray(name, path, options);
1188
+ const items = Array.isArray(value) ? value : forcedArray ? [value] : [value];
1189
+ for (const item of items) {
1190
+ if (item === void 0 || item === null) {
1191
+ children.push({ name });
1192
+ continue;
1193
+ }
1194
+ children.push(buildElement(name, item, options, path));
1195
+ }
1196
+ }
1197
+ function appendContent(children, value, options, path) {
1198
+ if (value === void 0 || value === null) {
1199
+ return;
1200
+ }
1201
+ if (Array.isArray(value)) {
1202
+ for (const item of value) {
1203
+ appendContent(children, item, options, path);
1204
+ }
1205
+ return;
1206
+ }
1207
+ if (isPrimitive(value)) {
1208
+ const text = coerceText(value);
1209
+ if (text !== null) {
1210
+ children.push(text);
1211
+ }
1212
+ return;
1213
+ }
1214
+ if (isRecord(value)) {
1215
+ for (const [key, entryValue] of Object.entries(value)) {
1216
+ const childName = normalizeXmlName(key, options);
1217
+ addChildElements(children, childName, entryValue, options, path);
1218
+ }
1219
+ }
1220
+ }
1221
+ function appendText(children, value, options) {
1222
+ if (value === void 0 || value === null) {
1223
+ return;
1224
+ }
1225
+ if (Array.isArray(value)) {
1226
+ const parts = value.map((item) => coerceText(item)).filter((item) => item !== null);
1227
+ if (parts.length === 0) {
1228
+ return;
1229
+ }
1230
+ if (options.coalesceText) {
1231
+ children.push(parts.join(""));
1232
+ return;
1233
+ }
1234
+ for (const part of parts) {
1235
+ children.push(part);
1236
+ }
1237
+ return;
1238
+ }
1239
+ const text = coerceText(value);
1240
+ if (text !== null) {
1241
+ children.push(text);
1242
+ }
1243
+ }
1244
+ function finalizeNode(name, attributes, children) {
1245
+ const node = { name };
1246
+ if (Object.keys(attributes).length > 0) {
1247
+ node.attributes = attributes;
1248
+ }
1249
+ if (children.length > 0) {
1250
+ node.children = children;
1251
+ }
1252
+ return node;
1253
+ }
1254
+ function isAttributeKey(key, options) {
1255
+ if (!options.attributePrefix) {
1256
+ return false;
1257
+ }
1258
+ return key.startsWith(options.attributePrefix) && key.length > options.attributePrefix.length;
1259
+ }
1260
+ function isRecord(value) {
1261
+ return typeof value === "object" && value !== null && !Array.isArray(value);
1262
+ }
1263
+ function isPrimitive(value) {
1264
+ return typeof value === "string" || typeof value === "number" || typeof value === "boolean";
1265
+ }
1266
+ function coerceText(value) {
1267
+ if (value === void 0 || value === null) {
1268
+ return null;
1269
+ }
1270
+ if (typeof value === "string") {
1271
+ return value;
1272
+ }
1273
+ if (typeof value === "number" || typeof value === "boolean") {
1274
+ return String(value);
1275
+ }
1276
+ if (value instanceof Date) {
1277
+ return value.toISOString();
1278
+ }
1279
+ return null;
1280
+ }
893
1281
  function finalizeElement(state, options) {
894
1282
  const hasAttributes = Object.keys(state.attributes).length > 0;
895
1283
  const hasChildren = Object.keys(state.children).length > 0;
@@ -919,6 +1307,8 @@ exports.TreeBuilder = TreeBuilder;
919
1307
  exports.XmlSaxError = XmlSaxError;
920
1308
  exports.XmlSaxParser = XmlSaxParser;
921
1309
  exports.buildObject = buildObject;
1310
+ exports.buildXmlNode = buildXmlNode;
1311
+ exports.objectToXml = objectToXml;
922
1312
  exports.parseXmlString = parseXmlString;
923
1313
  exports.resolveName = resolveName;
924
1314
  exports.serializeXml = serializeXml;