@eksml/xml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +588 -0
  3. package/dist/converters/fromLossless.d.mts +14 -0
  4. package/dist/converters/fromLossless.d.mts.map +1 -0
  5. package/dist/converters/fromLossless.mjs +35 -0
  6. package/dist/converters/fromLossless.mjs.map +1 -0
  7. package/dist/converters/fromLossy.d.mts +18 -0
  8. package/dist/converters/fromLossy.d.mts.map +1 -0
  9. package/dist/converters/fromLossy.mjs +91 -0
  10. package/dist/converters/fromLossy.mjs.map +1 -0
  11. package/dist/converters/lossless.d.mts +39 -0
  12. package/dist/converters/lossless.d.mts.map +1 -0
  13. package/dist/converters/lossless.mjs +74 -0
  14. package/dist/converters/lossless.mjs.map +1 -0
  15. package/dist/converters/lossy.d.mts +42 -0
  16. package/dist/converters/lossy.d.mts.map +1 -0
  17. package/dist/converters/lossy.mjs +158 -0
  18. package/dist/converters/lossy.mjs.map +1 -0
  19. package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
  20. package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
  21. package/dist/parser-BfdEfWDg.d.mts +95 -0
  22. package/dist/parser-BfdEfWDg.d.mts.map +1 -0
  23. package/dist/parser-CYq309aR.mjs +479 -0
  24. package/dist/parser-CYq309aR.mjs.map +1 -0
  25. package/dist/parser.d.mts +2 -0
  26. package/dist/parser.mjs +2 -0
  27. package/dist/sax.d.mts +64 -0
  28. package/dist/sax.d.mts.map +1 -0
  29. package/dist/sax.mjs +70 -0
  30. package/dist/sax.mjs.map +1 -0
  31. package/dist/saxEngine-BDnD7ruG.mjs +750 -0
  32. package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
  33. package/dist/utilities/index.d.mts +88 -0
  34. package/dist/utilities/index.d.mts.map +1 -0
  35. package/dist/utilities/index.mjs +87 -0
  36. package/dist/utilities/index.mjs.map +1 -0
  37. package/dist/writer.d.mts +58 -0
  38. package/dist/writer.d.mts.map +1 -0
  39. package/dist/writer.mjs +357 -0
  40. package/dist/writer.mjs.map +1 -0
  41. package/dist/xmlParseStream.d.mts +138 -0
  42. package/dist/xmlParseStream.d.mts.map +1 -0
  43. package/dist/xmlParseStream.mjs +313 -0
  44. package/dist/xmlParseStream.mjs.map +1 -0
  45. package/package.json +100 -0
  46. package/src/converters/fromLossless.ts +80 -0
  47. package/src/converters/fromLossy.ts +180 -0
  48. package/src/converters/lossless.ts +116 -0
  49. package/src/converters/lossy.ts +274 -0
  50. package/src/parser.ts +728 -0
  51. package/src/sax.ts +157 -0
  52. package/src/saxEngine.ts +1157 -0
  53. package/src/utilities/escapeRegExp.ts +19 -0
  54. package/src/utilities/filter.ts +63 -0
  55. package/src/utilities/getElementById.ts +21 -0
  56. package/src/utilities/getElementsByClassName.ts +22 -0
  57. package/src/utilities/htmlConstants.ts +26 -0
  58. package/src/utilities/index.ts +7 -0
  59. package/src/utilities/isElementNode.ts +19 -0
  60. package/src/utilities/isTextNode.ts +19 -0
  61. package/src/utilities/toContentString.ts +23 -0
  62. package/src/writer.ts +650 -0
  63. package/src/xmlParseStream.ts +597 -0
@@ -0,0 +1,750 @@
1
+ //#region src/saxEngine.ts
2
+ /**
3
+ * saxEngine — a high-performance, synchronous, event-based streaming XML parser.
4
+ *
5
+ * This is an internal module used by `createSaxParser` and `XmlParseStream`.
6
+ * It is not part of the public API.
7
+ *
8
+ * Architecture: single-pass state machine with batch scanning. Each character is
9
+ * consumed exactly once. Within a chunk, hot-path states (text, tag names,
10
+ * attribute names/values, close tags) scan ahead with indexOf / charCodeAt loops
11
+ * to extract tokens via a single substring() rather than per-character +=.
12
+ */
13
+ const GT = 62;
14
+ const SLASH = 47;
15
+ const BANG = 33;
16
+ const QUESTION = 63;
17
+ const EQ = 61;
18
+ const LBRACKET = 91;
19
+ const RBRACKET = 93;
20
+ const SQUOTE = 39;
21
+ const DQUOTE = 34;
22
+ const TAB = 9;
23
+ const LF = 10;
24
+ const CR = 13;
25
+ const SPACE = 32;
26
+ const DASH = 45;
27
+ const UPPER_C = 67;
28
+ const UPPER_D = 68;
29
+ const UPPER_A = 65;
30
+ const UPPER_T = 84;
31
+ const State = {
32
+ TEXT: 0,
33
+ TAG_OPEN: 1,
34
+ OPEN_TAG_NAME: 2,
35
+ OPEN_TAG_BODY: 3,
36
+ ATTR_NAME: 4,
37
+ ATTR_AFTER_NAME: 5,
38
+ ATTR_AFTER_EQ: 6,
39
+ ATTR_VALUE_DQ: 7,
40
+ ATTR_VALUE_SQ: 8,
41
+ ATTR_VALUE_UQ: 9,
42
+ CLOSE_TAG: 10,
43
+ SELF_CLOSING: 11,
44
+ COMMENT_1: 12,
45
+ COMMENT: 13,
46
+ COMMENT_END1: 14,
47
+ COMMENT_END2: 15,
48
+ CDATA_1: 16,
49
+ CDATA_2: 17,
50
+ CDATA_3: 18,
51
+ CDATA_4: 19,
52
+ CDATA_5: 20,
53
+ CDATA_6: 21,
54
+ CDATA: 22,
55
+ CDATA_END1: 23,
56
+ CDATA_END2: 24,
57
+ PI: 25,
58
+ PI_END: 26,
59
+ DOCTYPE: 27,
60
+ DOCTYPE_BRACKET: 28,
61
+ BANG_START: 29,
62
+ RAW_TEXT: 30,
63
+ RAW_END_1: 31,
64
+ RAW_END_2: 32,
65
+ RAW_END_3: 33
66
+ };
67
+ function saxEngine(options = {}) {
68
+ const { onOpenTag, onCloseTag, onText, onCdata, onComment, onProcessingInstruction, onDoctype, selfClosingTags = [], rawContentTags = [], maxBufferSize } = options;
69
+ const voidSet = selfClosingTags.length > 0 ? new Set(selfClosingTags) : null;
70
+ const rawSet = rawContentTags.length > 0 ? new Set(rawContentTags) : null;
71
+ let state = State.TEXT;
72
+ let text = "";
73
+ let tagName = "";
74
+ let attributeName = "";
75
+ let attributeValue = "";
76
+ let attributes = Object.create(null);
77
+ let special = "";
78
+ let rawTag = "";
79
+ let rawText = "";
80
+ let rawCloseTagMatchIndex = 0;
81
+ let rawCloseTagTrailing = "";
82
+ function trimWhitespace(input) {
83
+ let startIndex = 0;
84
+ let endIndex = input.length - 1;
85
+ while (startIndex <= endIndex) {
86
+ const charCode = input.charCodeAt(startIndex);
87
+ if (charCode !== SPACE && charCode !== TAB && charCode !== LF && charCode !== CR) break;
88
+ startIndex++;
89
+ }
90
+ while (endIndex >= startIndex) {
91
+ const charCode = input.charCodeAt(endIndex);
92
+ if (charCode !== SPACE && charCode !== TAB && charCode !== LF && charCode !== CR) break;
93
+ endIndex--;
94
+ }
95
+ return startIndex === 0 && endIndex === input.length - 1 ? input : input.substring(startIndex, endIndex + 1);
96
+ }
97
+ function emitText() {
98
+ if (text.length === 0) return;
99
+ if (onText) {
100
+ const trimmed = trimWhitespace(text);
101
+ if (trimmed.length > 0) onText(trimmed);
102
+ }
103
+ text = "";
104
+ }
105
+ /**
106
+ * Parse the accumulated DOCTYPE body (everything between `<!` and `>`,
107
+ * excluding internal DTD subsets) and emit an onDoctype event.
108
+ *
109
+ * The body string starts with the declaration keyword (e.g. "DOCTYPE html ...")
110
+ * after the `!`. We prepend `!` to form the tagName (e.g. "!DOCTYPE"), then
111
+ * parse the remaining space-separated tokens as null-valued attributes.
112
+ * Quoted strings are unquoted and stored as attribute keys.
113
+ */
114
+ function emitDoctype(body) {
115
+ const bodyLength = body.length;
116
+ let i = 0;
117
+ while (i < bodyLength) {
118
+ const charCode = body.charCodeAt(i);
119
+ if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) break;
120
+ i++;
121
+ }
122
+ const tagName = "!" + body.substring(0, i);
123
+ const attributes = Object.create(null);
124
+ while (i < bodyLength) {
125
+ const charCode = body.charCodeAt(i);
126
+ if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) {
127
+ i++;
128
+ continue;
129
+ }
130
+ if (charCode === DQUOTE || charCode === SQUOTE) {
131
+ const quoteChar = charCode === DQUOTE ? "\"" : "'";
132
+ const closeIndex = body.indexOf(quoteChar, i + 1);
133
+ if (closeIndex === -1) {
134
+ attributes[body.substring(i + 1)] = null;
135
+ break;
136
+ }
137
+ attributes[body.substring(i + 1, closeIndex)] = null;
138
+ i = closeIndex + 1;
139
+ continue;
140
+ }
141
+ const tokenStart = i;
142
+ while (i < bodyLength) {
143
+ const tokenCharCode = body.charCodeAt(i);
144
+ if (tokenCharCode === SPACE || tokenCharCode === TAB || tokenCharCode === LF || tokenCharCode === CR) break;
145
+ i++;
146
+ }
147
+ attributes[body.substring(tokenStart, i)] = null;
148
+ }
149
+ onDoctype(tagName, attributes);
150
+ }
151
+ /** After we finish parsing an open tag's `>`, handle void/raw transitions. */
152
+ function finishOpenTag() {
153
+ if (onOpenTag) onOpenTag(tagName, attributes);
154
+ if (voidSet !== null && voidSet.has(tagName)) {
155
+ if (onCloseTag) onCloseTag(tagName);
156
+ } else if (rawSet !== null && rawSet.has(tagName)) {
157
+ rawTag = tagName;
158
+ rawText = "";
159
+ rawCloseTagMatchIndex = 0;
160
+ state = State.RAW_TEXT;
161
+ }
162
+ }
163
+ /**
164
+ * Returns true if `charCode` is a tag-name-ending character:
165
+ * `>`, `/`, `=`, or whitespace.
166
+ */
167
+ function isNameEnd(charCode) {
168
+ return charCode === GT || charCode === SLASH || charCode === EQ || charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR;
169
+ }
170
+ function processChunk(chunk) {
171
+ const chunkLength = chunk.length;
172
+ let i = 0;
173
+ while (i < chunkLength) switch (state) {
174
+ case State.TEXT: {
175
+ const lessThanIndex = chunk.indexOf("<", i);
176
+ if (lessThanIndex === -1) {
177
+ text += i === 0 ? chunk : chunk.substring(i);
178
+ i = chunkLength;
179
+ } else {
180
+ if (lessThanIndex > i) text += chunk.substring(i, lessThanIndex);
181
+ emitText();
182
+ state = State.TAG_OPEN;
183
+ i = lessThanIndex + 1;
184
+ }
185
+ continue;
186
+ }
187
+ case State.TAG_OPEN: {
188
+ const charCode = chunk.charCodeAt(i);
189
+ if (charCode === SLASH) {
190
+ state = State.CLOSE_TAG;
191
+ tagName = "";
192
+ i++;
193
+ } else if (charCode === BANG) {
194
+ state = State.BANG_START;
195
+ special = "";
196
+ i++;
197
+ } else if (charCode === QUESTION) {
198
+ state = State.PI;
199
+ special = "";
200
+ i++;
201
+ } else {
202
+ state = State.OPEN_TAG_NAME;
203
+ tagName = "";
204
+ attributes = Object.create(null);
205
+ }
206
+ continue;
207
+ }
208
+ case State.OPEN_TAG_NAME: {
209
+ let j = i;
210
+ while (j < chunkLength) {
211
+ if (isNameEnd(chunk.charCodeAt(j))) break;
212
+ j++;
213
+ }
214
+ if (j > i) tagName += chunk.substring(i, j);
215
+ if (j >= chunkLength) {
216
+ i = chunkLength;
217
+ continue;
218
+ }
219
+ const charCode = chunk.charCodeAt(j);
220
+ if (charCode === GT) {
221
+ state = State.TEXT;
222
+ i = j + 1;
223
+ finishOpenTag();
224
+ } else if (charCode === SLASH) {
225
+ state = State.SELF_CLOSING;
226
+ i = j + 1;
227
+ } else {
228
+ state = State.OPEN_TAG_BODY;
229
+ i = j + 1;
230
+ }
231
+ continue;
232
+ }
233
+ case State.OPEN_TAG_BODY: {
234
+ const charCode = chunk.charCodeAt(i);
235
+ if (charCode === GT) {
236
+ state = State.TEXT;
237
+ i++;
238
+ finishOpenTag();
239
+ } else if (charCode === SLASH) {
240
+ state = State.SELF_CLOSING;
241
+ i++;
242
+ } else if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) i++;
243
+ else {
244
+ state = State.ATTR_NAME;
245
+ attributeName = "";
246
+ }
247
+ continue;
248
+ }
249
+ case State.ATTR_NAME: {
250
+ let j = i;
251
+ while (j < chunkLength) {
252
+ const charCode = chunk.charCodeAt(j);
253
+ if (charCode === EQ || charCode === GT || charCode === SLASH || charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) break;
254
+ j++;
255
+ }
256
+ if (j > i) attributeName += chunk.substring(i, j);
257
+ if (j >= chunkLength) {
258
+ i = chunkLength;
259
+ continue;
260
+ }
261
+ const charCode = chunk.charCodeAt(j);
262
+ if (charCode === EQ) {
263
+ state = State.ATTR_AFTER_EQ;
264
+ i = j + 1;
265
+ } else if (charCode === GT) {
266
+ attributes[attributeName] = null;
267
+ state = State.TEXT;
268
+ i = j + 1;
269
+ finishOpenTag();
270
+ } else if (charCode === SLASH) {
271
+ attributes[attributeName] = null;
272
+ state = State.SELF_CLOSING;
273
+ i = j + 1;
274
+ } else {
275
+ state = State.ATTR_AFTER_NAME;
276
+ i = j + 1;
277
+ }
278
+ continue;
279
+ }
280
+ case State.ATTR_AFTER_NAME: {
281
+ const charCode = chunk.charCodeAt(i);
282
+ if (charCode === EQ) {
283
+ state = State.ATTR_AFTER_EQ;
284
+ i++;
285
+ } else if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) i++;
286
+ else if (charCode === GT) {
287
+ attributes[attributeName] = null;
288
+ state = State.TEXT;
289
+ i++;
290
+ finishOpenTag();
291
+ } else if (charCode === SLASH) {
292
+ attributes[attributeName] = null;
293
+ state = State.SELF_CLOSING;
294
+ i++;
295
+ } else {
296
+ attributes[attributeName] = null;
297
+ state = State.ATTR_NAME;
298
+ attributeName = "";
299
+ }
300
+ continue;
301
+ }
302
+ case State.ATTR_AFTER_EQ: {
303
+ const charCode = chunk.charCodeAt(i);
304
+ if (charCode === DQUOTE) {
305
+ state = State.ATTR_VALUE_DQ;
306
+ attributeValue = "";
307
+ i++;
308
+ } else if (charCode === SQUOTE) {
309
+ state = State.ATTR_VALUE_SQ;
310
+ attributeValue = "";
311
+ i++;
312
+ } else if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) i++;
313
+ else if (charCode === GT) {
314
+ attributes[attributeName] = "";
315
+ state = State.TEXT;
316
+ i++;
317
+ finishOpenTag();
318
+ } else {
319
+ state = State.ATTR_VALUE_UQ;
320
+ attributeValue = "";
321
+ }
322
+ continue;
323
+ }
324
+ case State.ATTR_VALUE_DQ: {
325
+ const quoteIndex = chunk.indexOf("\"", i);
326
+ if (quoteIndex === -1) {
327
+ attributeValue += i === 0 ? chunk : chunk.substring(i);
328
+ i = chunkLength;
329
+ } else {
330
+ if (quoteIndex > i) attributeValue += chunk.substring(i, quoteIndex);
331
+ attributes[attributeName] = attributeValue;
332
+ state = State.OPEN_TAG_BODY;
333
+ i = quoteIndex + 1;
334
+ }
335
+ continue;
336
+ }
337
+ case State.ATTR_VALUE_SQ: {
338
+ const quoteIndex = chunk.indexOf("'", i);
339
+ if (quoteIndex === -1) {
340
+ attributeValue += i === 0 ? chunk : chunk.substring(i);
341
+ i = chunkLength;
342
+ } else {
343
+ if (quoteIndex > i) attributeValue += chunk.substring(i, quoteIndex);
344
+ attributes[attributeName] = attributeValue;
345
+ state = State.OPEN_TAG_BODY;
346
+ i = quoteIndex + 1;
347
+ }
348
+ continue;
349
+ }
350
+ case State.ATTR_VALUE_UQ: {
351
+ let j = i;
352
+ while (j < chunkLength) {
353
+ const charCode = chunk.charCodeAt(j);
354
+ if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR || charCode === GT || charCode === SLASH) break;
355
+ j++;
356
+ }
357
+ if (j > i) attributeValue += chunk.substring(i, j);
358
+ if (j >= chunkLength) {
359
+ i = chunkLength;
360
+ continue;
361
+ }
362
+ const charCode = chunk.charCodeAt(j);
363
+ attributes[attributeName] = attributeValue;
364
+ if (charCode === GT) {
365
+ state = State.TEXT;
366
+ i = j + 1;
367
+ finishOpenTag();
368
+ } else if (charCode === SLASH) {
369
+ state = State.SELF_CLOSING;
370
+ i = j + 1;
371
+ } else {
372
+ state = State.OPEN_TAG_BODY;
373
+ i = j + 1;
374
+ }
375
+ continue;
376
+ }
377
+ case State.CLOSE_TAG: {
378
+ const greaterThanIndex = chunk.indexOf(">", i);
379
+ if (greaterThanIndex === -1) {
380
+ tagName += i === 0 ? chunk : chunk.substring(i);
381
+ i = chunkLength;
382
+ } else {
383
+ if (greaterThanIndex > i) tagName += chunk.substring(i, greaterThanIndex);
384
+ if (onCloseTag) onCloseTag(trimWhitespace(tagName));
385
+ state = State.TEXT;
386
+ i = greaterThanIndex + 1;
387
+ }
388
+ continue;
389
+ }
390
+ case State.SELF_CLOSING:
391
+ if (chunk.charCodeAt(i) === GT) {
392
+ state = State.TEXT;
393
+ i++;
394
+ if (onOpenTag) onOpenTag(tagName, attributes);
395
+ if (onCloseTag) onCloseTag(tagName);
396
+ } else state = State.OPEN_TAG_BODY;
397
+ continue;
398
+ case State.BANG_START: {
399
+ const charCode = chunk.charCodeAt(i);
400
+ if (charCode === DASH) {
401
+ state = State.COMMENT_1;
402
+ special = "<!-";
403
+ i++;
404
+ } else if (charCode === LBRACKET) {
405
+ state = State.CDATA_1;
406
+ i++;
407
+ } else {
408
+ state = State.DOCTYPE;
409
+ special = "";
410
+ }
411
+ continue;
412
+ }
413
+ case State.COMMENT_1:
414
+ if (chunk.charCodeAt(i) === DASH) {
415
+ state = State.COMMENT;
416
+ special = "<!--";
417
+ i++;
418
+ } else {
419
+ special = "-";
420
+ state = State.DOCTYPE;
421
+ }
422
+ continue;
423
+ case State.COMMENT: {
424
+ const dashIndex = chunk.indexOf("-", i);
425
+ if (dashIndex === -1) {
426
+ special += i === 0 ? chunk : chunk.substring(i);
427
+ i = chunkLength;
428
+ } else {
429
+ if (dashIndex > i) special += chunk.substring(i, dashIndex);
430
+ special += "-";
431
+ state = State.COMMENT_END1;
432
+ i = dashIndex + 1;
433
+ }
434
+ continue;
435
+ }
436
+ case State.COMMENT_END1:
437
+ if (chunk.charCodeAt(i) === DASH) {
438
+ state = State.COMMENT_END2;
439
+ special += "-";
440
+ i++;
441
+ } else {
442
+ state = State.COMMENT;
443
+ special += chunk[i];
444
+ i++;
445
+ }
446
+ continue;
447
+ case State.COMMENT_END2: {
448
+ const charCode = chunk.charCodeAt(i);
449
+ if (charCode === GT) {
450
+ special += ">";
451
+ if (onComment) onComment(special);
452
+ special = "";
453
+ state = State.TEXT;
454
+ i++;
455
+ } else if (charCode === DASH) {
456
+ special += "-";
457
+ i++;
458
+ } else {
459
+ state = State.COMMENT;
460
+ special += chunk[i];
461
+ i++;
462
+ }
463
+ continue;
464
+ }
465
+ case State.CDATA_1:
466
+ if (chunk.charCodeAt(i) === UPPER_C) {
467
+ state = State.CDATA_2;
468
+ i++;
469
+ } else {
470
+ special = "[";
471
+ state = State.DOCTYPE;
472
+ }
473
+ continue;
474
+ case State.CDATA_2:
475
+ if (chunk.charCodeAt(i) === UPPER_D) {
476
+ state = State.CDATA_3;
477
+ i++;
478
+ } else {
479
+ special = "[C";
480
+ state = State.DOCTYPE;
481
+ }
482
+ continue;
483
+ case State.CDATA_3:
484
+ if (chunk.charCodeAt(i) === UPPER_A) {
485
+ state = State.CDATA_4;
486
+ i++;
487
+ } else {
488
+ special = "[CD";
489
+ state = State.DOCTYPE;
490
+ }
491
+ continue;
492
+ case State.CDATA_4:
493
+ if (chunk.charCodeAt(i) === UPPER_T) {
494
+ state = State.CDATA_5;
495
+ i++;
496
+ } else {
497
+ special = "[CDA";
498
+ state = State.DOCTYPE;
499
+ }
500
+ continue;
501
+ case State.CDATA_5:
502
+ if (chunk.charCodeAt(i) === UPPER_A) {
503
+ state = State.CDATA_6;
504
+ i++;
505
+ } else {
506
+ special = "[CDAT";
507
+ state = State.DOCTYPE;
508
+ }
509
+ continue;
510
+ case State.CDATA_6:
511
+ if (chunk.charCodeAt(i) === LBRACKET) {
512
+ state = State.CDATA;
513
+ special = "";
514
+ i++;
515
+ } else {
516
+ special = "[CDATA";
517
+ state = State.DOCTYPE;
518
+ }
519
+ continue;
520
+ case State.CDATA: {
521
+ const bracketIndex = chunk.indexOf("]", i);
522
+ if (bracketIndex === -1) {
523
+ special += i === 0 ? chunk : chunk.substring(i);
524
+ i = chunkLength;
525
+ } else {
526
+ if (bracketIndex > i) special += chunk.substring(i, bracketIndex);
527
+ state = State.CDATA_END1;
528
+ i = bracketIndex + 1;
529
+ }
530
+ continue;
531
+ }
532
+ case State.CDATA_END1:
533
+ if (chunk.charCodeAt(i) === RBRACKET) {
534
+ state = State.CDATA_END2;
535
+ i++;
536
+ } else {
537
+ special += "]" + chunk[i];
538
+ state = State.CDATA;
539
+ i++;
540
+ }
541
+ continue;
542
+ case State.CDATA_END2: {
543
+ const charCode = chunk.charCodeAt(i);
544
+ if (charCode === GT) {
545
+ if (onCdata) onCdata(special);
546
+ special = "";
547
+ state = State.TEXT;
548
+ i++;
549
+ } else if (charCode === RBRACKET) {
550
+ special += "]";
551
+ i++;
552
+ } else {
553
+ special += "]]" + chunk[i];
554
+ state = State.CDATA;
555
+ i++;
556
+ }
557
+ continue;
558
+ }
559
+ case State.PI: {
560
+ const questionMarkIndex = chunk.indexOf("?", i);
561
+ if (questionMarkIndex === -1) {
562
+ special += i === 0 ? chunk : chunk.substring(i);
563
+ i = chunkLength;
564
+ } else {
565
+ if (questionMarkIndex > i) special += chunk.substring(i, questionMarkIndex);
566
+ state = State.PI_END;
567
+ i = questionMarkIndex + 1;
568
+ }
569
+ continue;
570
+ }
571
+ case State.PI_END:
572
+ if (chunk.charCodeAt(i) === GT) {
573
+ if (onProcessingInstruction) {
574
+ const inner = special;
575
+ let whitespaceIndex = -1;
576
+ for (let j = 0; j < inner.length; j++) {
577
+ const innerCharCode = inner.charCodeAt(j);
578
+ if (innerCharCode === SPACE || innerCharCode === TAB || innerCharCode === LF || innerCharCode === CR) {
579
+ whitespaceIndex = j;
580
+ break;
581
+ }
582
+ }
583
+ if (whitespaceIndex === -1) onProcessingInstruction(inner, "");
584
+ else {
585
+ const instructionName = inner.substring(0, whitespaceIndex);
586
+ let bodyStartIndex = whitespaceIndex + 1;
587
+ while (bodyStartIndex < inner.length) {
588
+ const bodyCharCode = inner.charCodeAt(bodyStartIndex);
589
+ if (bodyCharCode !== SPACE && bodyCharCode !== TAB && bodyCharCode !== LF && bodyCharCode !== CR) break;
590
+ bodyStartIndex++;
591
+ }
592
+ let bodyEndIndex = inner.length - 1;
593
+ while (bodyEndIndex >= bodyStartIndex) {
594
+ const bodyCharCode = inner.charCodeAt(bodyEndIndex);
595
+ if (bodyCharCode !== SPACE && bodyCharCode !== TAB && bodyCharCode !== LF && bodyCharCode !== CR) break;
596
+ bodyEndIndex--;
597
+ }
598
+ onProcessingInstruction(instructionName, bodyStartIndex <= bodyEndIndex ? inner.substring(bodyStartIndex, bodyEndIndex + 1) : "");
599
+ }
600
+ }
601
+ special = "";
602
+ state = State.TEXT;
603
+ i++;
604
+ } else {
605
+ special += "?";
606
+ state = State.PI;
607
+ }
608
+ continue;
609
+ case State.DOCTYPE: {
610
+ const charCode = chunk.charCodeAt(i);
611
+ if (charCode === GT) {
612
+ if (onDoctype) emitDoctype(special);
613
+ special = "";
614
+ state = State.TEXT;
615
+ i++;
616
+ } else if (charCode === LBRACKET) {
617
+ state = State.DOCTYPE_BRACKET;
618
+ i++;
619
+ } else {
620
+ let j = i;
621
+ while (j < chunkLength) {
622
+ const scanCharCode = chunk.charCodeAt(j);
623
+ if (scanCharCode === GT || scanCharCode === LBRACKET) break;
624
+ j++;
625
+ }
626
+ special += chunk.substring(i, j);
627
+ i = j;
628
+ }
629
+ continue;
630
+ }
631
+ case State.DOCTYPE_BRACKET:
632
+ if (chunk.charCodeAt(i) === RBRACKET) {
633
+ state = State.DOCTYPE;
634
+ i++;
635
+ } else i++;
636
+ continue;
637
+ case State.RAW_TEXT: {
638
+ const lessThanIndex = chunk.indexOf("<", i);
639
+ if (lessThanIndex === -1) {
640
+ rawText += i === 0 ? chunk : chunk.substring(i);
641
+ i = chunkLength;
642
+ } else {
643
+ if (lessThanIndex > i) rawText += chunk.substring(i, lessThanIndex);
644
+ state = State.RAW_END_1;
645
+ i = lessThanIndex + 1;
646
+ }
647
+ continue;
648
+ }
649
+ case State.RAW_END_1:
650
+ if (chunk.charCodeAt(i) === SLASH) {
651
+ state = State.RAW_END_2;
652
+ rawCloseTagMatchIndex = 0;
653
+ i++;
654
+ } else {
655
+ rawText += "<";
656
+ state = State.RAW_TEXT;
657
+ }
658
+ continue;
659
+ case State.RAW_END_2:
660
+ if (rawCloseTagMatchIndex < rawTag.length) if (chunk[i] === rawTag[rawCloseTagMatchIndex]) {
661
+ rawCloseTagMatchIndex++;
662
+ i++;
663
+ } else {
664
+ rawText += "</" + rawTag.substring(0, rawCloseTagMatchIndex);
665
+ state = State.RAW_TEXT;
666
+ }
667
+ else {
668
+ const charCode = chunk.charCodeAt(i);
669
+ if (charCode === GT) {
670
+ if (onText && rawText.length > 0) onText(rawText);
671
+ if (onCloseTag) onCloseTag(rawTag);
672
+ rawText = "";
673
+ rawTag = "";
674
+ state = State.TEXT;
675
+ i++;
676
+ } else if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) {
677
+ rawCloseTagTrailing = chunk[i];
678
+ state = State.RAW_END_3;
679
+ i++;
680
+ } else {
681
+ rawText += "</" + rawTag;
682
+ state = State.RAW_TEXT;
683
+ }
684
+ }
685
+ continue;
686
+ case State.RAW_END_3: {
687
+ const charCode = chunk.charCodeAt(i);
688
+ if (charCode === GT) {
689
+ if (onText && rawText.length > 0) onText(rawText);
690
+ if (onCloseTag) onCloseTag(rawTag);
691
+ rawText = "";
692
+ rawTag = "";
693
+ rawCloseTagTrailing = "";
694
+ state = State.TEXT;
695
+ i++;
696
+ } else if (charCode === SPACE || charCode === TAB || charCode === LF || charCode === CR) {
697
+ rawCloseTagTrailing += chunk[i];
698
+ i++;
699
+ } else {
700
+ rawText += "</" + rawTag + rawCloseTagTrailing;
701
+ rawCloseTagTrailing = "";
702
+ state = State.RAW_TEXT;
703
+ }
704
+ continue;
705
+ }
706
+ default:
707
+ i++;
708
+ continue;
709
+ }
710
+ }
711
+ return {
712
+ write(chunk) {
713
+ if (chunk.length === 0) return;
714
+ processChunk(chunk);
715
+ if (maxBufferSize !== void 0 && (text.length > maxBufferSize || attributeValue.length > maxBufferSize || special.length > maxBufferSize || rawText.length > maxBufferSize)) {
716
+ const buf = text.length > maxBufferSize ? "text" : attributeValue.length > maxBufferSize ? "attribute value" : special.length > maxBufferSize ? "special" : "raw text";
717
+ throw new RangeError(`Buffer overflow: ${buf} buffer exceeded maxBufferSize (${maxBufferSize})`);
718
+ }
719
+ },
720
+ close() {
721
+ if (state === State.TEXT) emitText();
722
+ else if (state === State.RAW_END_3) {
723
+ if (onText && rawText.length > 0) onText(rawText);
724
+ if (onCloseTag) onCloseTag(rawTag);
725
+ rawText = "";
726
+ rawTag = "";
727
+ rawCloseTagTrailing = "";
728
+ state = State.TEXT;
729
+ } else if (state === State.RAW_TEXT || state === State.RAW_END_1 || state === State.RAW_END_2) {
730
+ if (state === State.RAW_END_1) rawText += "<";
731
+ else if (state === State.RAW_END_2) rawText += "</" + rawTag.substring(0, rawCloseTagMatchIndex);
732
+ if (onText && rawText.length > 0) onText(rawText);
733
+ if (onCloseTag) onCloseTag(rawTag);
734
+ rawText = "";
735
+ rawTag = "";
736
+ state = State.TEXT;
737
+ }
738
+ text = "";
739
+ tagName = "";
740
+ attributeName = "";
741
+ attributeValue = "";
742
+ special = "";
743
+ state = State.TEXT;
744
+ }
745
+ };
746
+ }
747
+ //#endregion
748
+ export { saxEngine as t };
749
+
750
+ //# sourceMappingURL=saxEngine-BDnD7ruG.mjs.map