vite 6.0.0-beta.1 → 6.0.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,3 @@
1
- import { fileURLToPath as __cjs_fileURLToPath } from 'node:url';
2
- import { dirname as __cjs_dirname } from 'node:path';
3
- import { createRequire as __cjs_createRequire } from 'node:module';
4
-
5
- const __filename = __cjs_fileURLToPath(import.meta.url);
6
- const __dirname = __cjs_dirname(__filename);
7
- const require = __cjs_createRequire(import.meta.url);
8
- const __require = require;
9
1
  const UNDEFINED_CODE_POINTS = new Set([
10
2
  65534, 65535, 131070, 131071, 196606, 196607, 262142, 262143, 327678, 327679, 393214,
11
3
  393215, 458750, 458751, 524286, 524287, 589822, 589823, 655358, 655359, 720894,
@@ -24,7 +16,6 @@ var CODE_POINTS;
24
16
  CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE";
25
17
  CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK";
26
18
  CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK";
27
- CODE_POINTS[CODE_POINTS["NUMBER_SIGN"] = 35] = "NUMBER_SIGN";
28
19
  CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND";
29
20
  CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE";
30
21
  CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS";
@@ -37,17 +28,12 @@ var CODE_POINTS;
37
28
  CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN";
38
29
  CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK";
39
30
  CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A";
40
- CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_F"] = 70] = "LATIN_CAPITAL_F";
41
- CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_X"] = 88] = "LATIN_CAPITAL_X";
42
31
  CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z";
43
32
  CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET";
44
33
  CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT";
45
34
  CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A";
46
- CODE_POINTS[CODE_POINTS["LATIN_SMALL_F"] = 102] = "LATIN_SMALL_F";
47
- CODE_POINTS[CODE_POINTS["LATIN_SMALL_X"] = 120] = "LATIN_SMALL_X";
48
35
  CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z";
49
- CODE_POINTS[CODE_POINTS["REPLACEMENT_CHARACTER"] = 65533] = "REPLACEMENT_CHARACTER";
50
- })(CODE_POINTS = CODE_POINTS || (CODE_POINTS = {}));
36
+ })(CODE_POINTS || (CODE_POINTS = {}));
51
37
  const SEQUENCES = {
52
38
  DASH_DASH: '--',
53
39
  CDATA_START: '[CDATA[',
@@ -137,7 +123,7 @@ var ERR;
137
123
  ERR["misplacedStartTagForHeadElement"] = "misplaced-start-tag-for-head-element";
138
124
  ERR["nestedNoscriptInHead"] = "nested-noscript-in-head";
139
125
  ERR["eofInElementThatCanContainOnlyText"] = "eof-in-element-that-can-contain-only-text";
140
- })(ERR = ERR || (ERR = {}));
126
+ })(ERR || (ERR = {}));
141
127
 
142
128
  //Const
143
129
  const DEFAULT_BUFFER_WATERLINE = 1 << 16;
@@ -170,22 +156,24 @@ class Preprocessor {
170
156
  get offset() {
171
157
  return this.droppedBufferSize + this.pos;
172
158
  }
173
- getError(code) {
159
+ getError(code, cpOffset) {
174
160
  const { line, col, offset } = this;
161
+ const startCol = col + cpOffset;
162
+ const startOffset = offset + cpOffset;
175
163
  return {
176
164
  code,
177
165
  startLine: line,
178
166
  endLine: line,
179
- startCol: col,
180
- endCol: col,
181
- startOffset: offset,
182
- endOffset: offset,
167
+ startCol,
168
+ endCol: startCol,
169
+ startOffset,
170
+ endOffset: startOffset,
183
171
  };
184
172
  }
185
173
  _err(code) {
186
174
  if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
187
175
  this.lastErrOffset = this.offset;
188
- this.handler.onParseError(this.getError(code));
176
+ this.handler.onParseError(this.getError(code, 0));
189
177
  }
190
178
  }
191
179
  _addGap() {
@@ -343,7 +331,7 @@ var TokenType;
343
331
  TokenType[TokenType["DOCTYPE"] = 6] = "DOCTYPE";
344
332
  TokenType[TokenType["EOF"] = 7] = "EOF";
345
333
  TokenType[TokenType["HIBERNATION"] = 8] = "HIBERNATION";
346
- })(TokenType = TokenType || (TokenType = {}));
334
+ })(TokenType || (TokenType = {}));
347
335
  function getTokenAttr(token, attrName) {
348
336
  for (let i = token.attrs.length - 1; i >= 0; i--) {
349
337
  if (token.attrs[i].name === attrName) {
@@ -367,6 +355,51 @@ new Uint16Array(
367
355
  .split("")
368
356
  .map((c) => c.charCodeAt(0)));
369
357
 
358
+ // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
359
+ const decodeMap = new Map([
360
+ [0, 65533],
361
+ // C1 Unicode control character reference replacements
362
+ [128, 8364],
363
+ [130, 8218],
364
+ [131, 402],
365
+ [132, 8222],
366
+ [133, 8230],
367
+ [134, 8224],
368
+ [135, 8225],
369
+ [136, 710],
370
+ [137, 8240],
371
+ [138, 352],
372
+ [139, 8249],
373
+ [140, 338],
374
+ [142, 381],
375
+ [145, 8216],
376
+ [146, 8217],
377
+ [147, 8220],
378
+ [148, 8221],
379
+ [149, 8226],
380
+ [150, 8211],
381
+ [151, 8212],
382
+ [152, 732],
383
+ [153, 8482],
384
+ [154, 353],
385
+ [155, 8250],
386
+ [156, 339],
387
+ [158, 382],
388
+ [159, 376],
389
+ ]);
390
+ /**
391
+ * Replace the given code point with a replacement character if it is a
392
+ * surrogate or is outside the valid range. Otherwise return the code
393
+ * point unchanged.
394
+ */
395
+ function replaceCodePoint(codePoint) {
396
+ var _a;
397
+ if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
398
+ return 0xfffd;
399
+ }
400
+ return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
401
+ }
402
+
370
403
  var CharCodes;
371
404
  (function (CharCodes) {
372
405
  CharCodes[CharCodes["NUM"] = 35] = "NUM";
@@ -382,12 +415,35 @@ var CharCodes;
382
415
  CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
383
416
  CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
384
417
  })(CharCodes || (CharCodes = {}));
418
+ /** Bit that needs to be set to convert an upper case ASCII character to lower case */
419
+ const TO_LOWER_BIT = 0b100000;
385
420
  var BinTrieFlags;
386
421
  (function (BinTrieFlags) {
387
422
  BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
388
423
  BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
389
424
  BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
390
425
  })(BinTrieFlags || (BinTrieFlags = {}));
426
+ function isNumber(code) {
427
+ return code >= CharCodes.ZERO && code <= CharCodes.NINE;
428
+ }
429
+ function isHexadecimalCharacter(code) {
430
+ return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
431
+ (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
432
+ }
433
+ function isAsciiAlphaNumeric$1(code) {
434
+ return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
435
+ (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
436
+ isNumber(code));
437
+ }
438
+ /**
439
+ * Checks if the given character is a valid end character for an entity in an attribute.
440
+ *
441
+ * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
442
+ * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
443
+ */
444
+ function isEntityInAttributeInvalidEnd(code) {
445
+ return code === CharCodes.EQUALS || isAsciiAlphaNumeric$1(code);
446
+ }
391
447
  var EntityDecoderState;
392
448
  (function (EntityDecoderState) {
393
449
  EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
@@ -405,6 +461,320 @@ var DecodingMode;
405
461
  /** Entities in attributes have limitations on ending characters. */
406
462
  DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
407
463
  })(DecodingMode || (DecodingMode = {}));
464
+ /**
465
+ * Token decoder with support of writing partial entities.
466
+ */
467
+ class EntityDecoder {
468
+ constructor(
469
+ /** The tree used to decode entities. */
470
+ decodeTree,
471
+ /**
472
+ * The function that is called when a codepoint is decoded.
473
+ *
474
+ * For multi-byte named entities, this will be called multiple times,
475
+ * with the second codepoint, and the same `consumed` value.
476
+ *
477
+ * @param codepoint The decoded codepoint.
478
+ * @param consumed The number of bytes consumed by the decoder.
479
+ */
480
+ emitCodePoint,
481
+ /** An object that is used to produce errors. */
482
+ errors) {
483
+ this.decodeTree = decodeTree;
484
+ this.emitCodePoint = emitCodePoint;
485
+ this.errors = errors;
486
+ /** The current state of the decoder. */
487
+ this.state = EntityDecoderState.EntityStart;
488
+ /** Characters that were consumed while parsing an entity. */
489
+ this.consumed = 1;
490
+ /**
491
+ * The result of the entity.
492
+ *
493
+ * Either the result index of a numeric entity, or the codepoint of a
494
+ * numeric entity.
495
+ */
496
+ this.result = 0;
497
+ /** The current index in the decode tree. */
498
+ this.treeIndex = 0;
499
+ /** The number of characters that were consumed in excess. */
500
+ this.excess = 1;
501
+ /** The mode in which the decoder is operating. */
502
+ this.decodeMode = DecodingMode.Strict;
503
+ }
504
+ /** Resets the instance to make it reusable. */
505
+ startEntity(decodeMode) {
506
+ this.decodeMode = decodeMode;
507
+ this.state = EntityDecoderState.EntityStart;
508
+ this.result = 0;
509
+ this.treeIndex = 0;
510
+ this.excess = 1;
511
+ this.consumed = 1;
512
+ }
513
+ /**
514
+ * Write an entity to the decoder. This can be called multiple times with partial entities.
515
+ * If the entity is incomplete, the decoder will return -1.
516
+ *
517
+ * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
518
+ * entity is incomplete, and resume when the next string is written.
519
+ *
520
+ * @param string The string containing the entity (or a continuation of the entity).
521
+ * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
522
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
523
+ */
524
+ write(str, offset) {
525
+ switch (this.state) {
526
+ case EntityDecoderState.EntityStart: {
527
+ if (str.charCodeAt(offset) === CharCodes.NUM) {
528
+ this.state = EntityDecoderState.NumericStart;
529
+ this.consumed += 1;
530
+ return this.stateNumericStart(str, offset + 1);
531
+ }
532
+ this.state = EntityDecoderState.NamedEntity;
533
+ return this.stateNamedEntity(str, offset);
534
+ }
535
+ case EntityDecoderState.NumericStart: {
536
+ return this.stateNumericStart(str, offset);
537
+ }
538
+ case EntityDecoderState.NumericDecimal: {
539
+ return this.stateNumericDecimal(str, offset);
540
+ }
541
+ case EntityDecoderState.NumericHex: {
542
+ return this.stateNumericHex(str, offset);
543
+ }
544
+ case EntityDecoderState.NamedEntity: {
545
+ return this.stateNamedEntity(str, offset);
546
+ }
547
+ }
548
+ }
549
+ /**
550
+ * Switches between the numeric decimal and hexadecimal states.
551
+ *
552
+ * Equivalent to the `Numeric character reference state` in the HTML spec.
553
+ *
554
+ * @param str The string containing the entity (or a continuation of the entity).
555
+ * @param offset The current offset.
556
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
557
+ */
558
+ stateNumericStart(str, offset) {
559
+ if (offset >= str.length) {
560
+ return -1;
561
+ }
562
+ if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
563
+ this.state = EntityDecoderState.NumericHex;
564
+ this.consumed += 1;
565
+ return this.stateNumericHex(str, offset + 1);
566
+ }
567
+ this.state = EntityDecoderState.NumericDecimal;
568
+ return this.stateNumericDecimal(str, offset);
569
+ }
570
+ addToNumericResult(str, start, end, base) {
571
+ if (start !== end) {
572
+ const digitCount = end - start;
573
+ this.result =
574
+ this.result * Math.pow(base, digitCount) +
575
+ parseInt(str.substr(start, digitCount), base);
576
+ this.consumed += digitCount;
577
+ }
578
+ }
579
+ /**
580
+ * Parses a hexadecimal numeric entity.
581
+ *
582
+ * Equivalent to the `Hexademical character reference state` in the HTML spec.
583
+ *
584
+ * @param str The string containing the entity (or a continuation of the entity).
585
+ * @param offset The current offset.
586
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
587
+ */
588
+ stateNumericHex(str, offset) {
589
+ const startIdx = offset;
590
+ while (offset < str.length) {
591
+ const char = str.charCodeAt(offset);
592
+ if (isNumber(char) || isHexadecimalCharacter(char)) {
593
+ offset += 1;
594
+ }
595
+ else {
596
+ this.addToNumericResult(str, startIdx, offset, 16);
597
+ return this.emitNumericEntity(char, 3);
598
+ }
599
+ }
600
+ this.addToNumericResult(str, startIdx, offset, 16);
601
+ return -1;
602
+ }
603
+ /**
604
+ * Parses a decimal numeric entity.
605
+ *
606
+ * Equivalent to the `Decimal character reference state` in the HTML spec.
607
+ *
608
+ * @param str The string containing the entity (or a continuation of the entity).
609
+ * @param offset The current offset.
610
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
611
+ */
612
+ stateNumericDecimal(str, offset) {
613
+ const startIdx = offset;
614
+ while (offset < str.length) {
615
+ const char = str.charCodeAt(offset);
616
+ if (isNumber(char)) {
617
+ offset += 1;
618
+ }
619
+ else {
620
+ this.addToNumericResult(str, startIdx, offset, 10);
621
+ return this.emitNumericEntity(char, 2);
622
+ }
623
+ }
624
+ this.addToNumericResult(str, startIdx, offset, 10);
625
+ return -1;
626
+ }
627
+ /**
628
+ * Validate and emit a numeric entity.
629
+ *
630
+ * Implements the logic from the `Hexademical character reference start
631
+ * state` and `Numeric character reference end state` in the HTML spec.
632
+ *
633
+ * @param lastCp The last code point of the entity. Used to see if the
634
+ * entity was terminated with a semicolon.
635
+ * @param expectedLength The minimum number of characters that should be
636
+ * consumed. Used to validate that at least one digit
637
+ * was consumed.
638
+ * @returns The number of characters that were consumed.
639
+ */
640
+ emitNumericEntity(lastCp, expectedLength) {
641
+ var _a;
642
+ // Ensure we consumed at least one digit.
643
+ if (this.consumed <= expectedLength) {
644
+ (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
645
+ return 0;
646
+ }
647
+ // Figure out if this is a legit end of the entity
648
+ if (lastCp === CharCodes.SEMI) {
649
+ this.consumed += 1;
650
+ }
651
+ else if (this.decodeMode === DecodingMode.Strict) {
652
+ return 0;
653
+ }
654
+ this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
655
+ if (this.errors) {
656
+ if (lastCp !== CharCodes.SEMI) {
657
+ this.errors.missingSemicolonAfterCharacterReference();
658
+ }
659
+ this.errors.validateNumericCharacterReference(this.result);
660
+ }
661
+ return this.consumed;
662
+ }
663
+ /**
664
+ * Parses a named entity.
665
+ *
666
+ * Equivalent to the `Named character reference state` in the HTML spec.
667
+ *
668
+ * @param str The string containing the entity (or a continuation of the entity).
669
+ * @param offset The current offset.
670
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
671
+ */
672
+ stateNamedEntity(str, offset) {
673
+ const { decodeTree } = this;
674
+ let current = decodeTree[this.treeIndex];
675
+ // The mask is the number of bytes of the value, including the current byte.
676
+ let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
677
+ for (; offset < str.length; offset++, this.excess++) {
678
+ const char = str.charCodeAt(offset);
679
+ this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
680
+ if (this.treeIndex < 0) {
681
+ return this.result === 0 ||
682
+ // If we are parsing an attribute
683
+ (this.decodeMode === DecodingMode.Attribute &&
684
+ // We shouldn't have consumed any characters after the entity,
685
+ (valueLength === 0 ||
686
+ // And there should be no invalid characters.
687
+ isEntityInAttributeInvalidEnd(char)))
688
+ ? 0
689
+ : this.emitNotTerminatedNamedEntity();
690
+ }
691
+ current = decodeTree[this.treeIndex];
692
+ valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
693
+ // If the branch is a value, store it and continue
694
+ if (valueLength !== 0) {
695
+ // If the entity is terminated by a semicolon, we are done.
696
+ if (char === CharCodes.SEMI) {
697
+ return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
698
+ }
699
+ // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
700
+ if (this.decodeMode !== DecodingMode.Strict) {
701
+ this.result = this.treeIndex;
702
+ this.consumed += this.excess;
703
+ this.excess = 0;
704
+ }
705
+ }
706
+ }
707
+ return -1;
708
+ }
709
+ /**
710
+ * Emit a named entity that was not terminated with a semicolon.
711
+ *
712
+ * @returns The number of characters consumed.
713
+ */
714
+ emitNotTerminatedNamedEntity() {
715
+ var _a;
716
+ const { result, decodeTree } = this;
717
+ const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
718
+ this.emitNamedEntityData(result, valueLength, this.consumed);
719
+ (_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
720
+ return this.consumed;
721
+ }
722
+ /**
723
+ * Emit a named entity.
724
+ *
725
+ * @param result The index of the entity in the decode tree.
726
+ * @param valueLength The number of bytes in the entity.
727
+ * @param consumed The number of characters consumed.
728
+ *
729
+ * @returns The number of characters consumed.
730
+ */
731
+ emitNamedEntityData(result, valueLength, consumed) {
732
+ const { decodeTree } = this;
733
+ this.emitCodePoint(valueLength === 1
734
+ ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
735
+ : decodeTree[result + 1], consumed);
736
+ if (valueLength === 3) {
737
+ // For multi-byte values, we need to emit the second byte.
738
+ this.emitCodePoint(decodeTree[result + 2], consumed);
739
+ }
740
+ return consumed;
741
+ }
742
+ /**
743
+ * Signal to the parser that the end of the input was reached.
744
+ *
745
+ * Remaining data will be emitted and relevant errors will be produced.
746
+ *
747
+ * @returns The number of characters consumed.
748
+ */
749
+ end() {
750
+ var _a;
751
+ switch (this.state) {
752
+ case EntityDecoderState.NamedEntity: {
753
+ // Emit a named entity if we have one.
754
+ return this.result !== 0 &&
755
+ (this.decodeMode !== DecodingMode.Attribute ||
756
+ this.result === this.treeIndex)
757
+ ? this.emitNotTerminatedNamedEntity()
758
+ : 0;
759
+ }
760
+ // Otherwise, emit a numeric entity if we have one.
761
+ case EntityDecoderState.NumericDecimal: {
762
+ return this.emitNumericEntity(0, 2);
763
+ }
764
+ case EntityDecoderState.NumericHex: {
765
+ return this.emitNumericEntity(0, 3);
766
+ }
767
+ case EntityDecoderState.NumericStart: {
768
+ (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
769
+ return 0;
770
+ }
771
+ case EntityDecoderState.EntityStart: {
772
+ // Return 0 if we have no entity.
773
+ return 0;
774
+ }
775
+ }
776
+ }
777
+ }
408
778
  /**
409
779
  * Determines the branch of the current node that is taken given the current
410
780
  * character. This function is used to traverse the trie.
@@ -458,7 +828,7 @@ var NS;
458
828
  NS["XLINK"] = "http://www.w3.org/1999/xlink";
459
829
  NS["XML"] = "http://www.w3.org/XML/1998/namespace";
460
830
  NS["XMLNS"] = "http://www.w3.org/2000/xmlns/";
461
- })(NS = NS || (NS = {}));
831
+ })(NS || (NS = {}));
462
832
  var ATTRS;
463
833
  (function (ATTRS) {
464
834
  ATTRS["TYPE"] = "type";
@@ -469,7 +839,7 @@ var ATTRS;
469
839
  ATTRS["COLOR"] = "color";
470
840
  ATTRS["FACE"] = "face";
471
841
  ATTRS["SIZE"] = "size";
472
- })(ATTRS = ATTRS || (ATTRS = {}));
842
+ })(ATTRS || (ATTRS = {}));
473
843
  /**
474
844
  * The mode of the document.
475
845
  *
@@ -480,7 +850,7 @@ var DOCUMENT_MODE;
480
850
  DOCUMENT_MODE["NO_QUIRKS"] = "no-quirks";
481
851
  DOCUMENT_MODE["QUIRKS"] = "quirks";
482
852
  DOCUMENT_MODE["LIMITED_QUIRKS"] = "limited-quirks";
483
- })(DOCUMENT_MODE = DOCUMENT_MODE || (DOCUMENT_MODE = {}));
853
+ })(DOCUMENT_MODE || (DOCUMENT_MODE = {}));
484
854
  var TAG_NAMES;
485
855
  (function (TAG_NAMES) {
486
856
  TAG_NAMES["A"] = "a";
@@ -576,6 +946,7 @@ var TAG_NAMES;
576
946
  TAG_NAMES["RUBY"] = "ruby";
577
947
  TAG_NAMES["S"] = "s";
578
948
  TAG_NAMES["SCRIPT"] = "script";
949
+ TAG_NAMES["SEARCH"] = "search";
579
950
  TAG_NAMES["SECTION"] = "section";
580
951
  TAG_NAMES["SELECT"] = "select";
581
952
  TAG_NAMES["SOURCE"] = "source";
@@ -605,7 +976,7 @@ var TAG_NAMES;
605
976
  TAG_NAMES["VAR"] = "var";
606
977
  TAG_NAMES["WBR"] = "wbr";
607
978
  TAG_NAMES["XMP"] = "xmp";
608
- })(TAG_NAMES = TAG_NAMES || (TAG_NAMES = {}));
979
+ })(TAG_NAMES || (TAG_NAMES = {}));
609
980
  /**
610
981
  * Tag IDs are numeric IDs for known tag names.
611
982
  *
@@ -707,36 +1078,37 @@ var TAG_ID;
707
1078
  TAG_ID[TAG_ID["RUBY"] = 91] = "RUBY";
708
1079
  TAG_ID[TAG_ID["S"] = 92] = "S";
709
1080
  TAG_ID[TAG_ID["SCRIPT"] = 93] = "SCRIPT";
710
- TAG_ID[TAG_ID["SECTION"] = 94] = "SECTION";
711
- TAG_ID[TAG_ID["SELECT"] = 95] = "SELECT";
712
- TAG_ID[TAG_ID["SOURCE"] = 96] = "SOURCE";
713
- TAG_ID[TAG_ID["SMALL"] = 97] = "SMALL";
714
- TAG_ID[TAG_ID["SPAN"] = 98] = "SPAN";
715
- TAG_ID[TAG_ID["STRIKE"] = 99] = "STRIKE";
716
- TAG_ID[TAG_ID["STRONG"] = 100] = "STRONG";
717
- TAG_ID[TAG_ID["STYLE"] = 101] = "STYLE";
718
- TAG_ID[TAG_ID["SUB"] = 102] = "SUB";
719
- TAG_ID[TAG_ID["SUMMARY"] = 103] = "SUMMARY";
720
- TAG_ID[TAG_ID["SUP"] = 104] = "SUP";
721
- TAG_ID[TAG_ID["TABLE"] = 105] = "TABLE";
722
- TAG_ID[TAG_ID["TBODY"] = 106] = "TBODY";
723
- TAG_ID[TAG_ID["TEMPLATE"] = 107] = "TEMPLATE";
724
- TAG_ID[TAG_ID["TEXTAREA"] = 108] = "TEXTAREA";
725
- TAG_ID[TAG_ID["TFOOT"] = 109] = "TFOOT";
726
- TAG_ID[TAG_ID["TD"] = 110] = "TD";
727
- TAG_ID[TAG_ID["TH"] = 111] = "TH";
728
- TAG_ID[TAG_ID["THEAD"] = 112] = "THEAD";
729
- TAG_ID[TAG_ID["TITLE"] = 113] = "TITLE";
730
- TAG_ID[TAG_ID["TR"] = 114] = "TR";
731
- TAG_ID[TAG_ID["TRACK"] = 115] = "TRACK";
732
- TAG_ID[TAG_ID["TT"] = 116] = "TT";
733
- TAG_ID[TAG_ID["U"] = 117] = "U";
734
- TAG_ID[TAG_ID["UL"] = 118] = "UL";
735
- TAG_ID[TAG_ID["SVG"] = 119] = "SVG";
736
- TAG_ID[TAG_ID["VAR"] = 120] = "VAR";
737
- TAG_ID[TAG_ID["WBR"] = 121] = "WBR";
738
- TAG_ID[TAG_ID["XMP"] = 122] = "XMP";
739
- })(TAG_ID = TAG_ID || (TAG_ID = {}));
1081
+ TAG_ID[TAG_ID["SEARCH"] = 94] = "SEARCH";
1082
+ TAG_ID[TAG_ID["SECTION"] = 95] = "SECTION";
1083
+ TAG_ID[TAG_ID["SELECT"] = 96] = "SELECT";
1084
+ TAG_ID[TAG_ID["SOURCE"] = 97] = "SOURCE";
1085
+ TAG_ID[TAG_ID["SMALL"] = 98] = "SMALL";
1086
+ TAG_ID[TAG_ID["SPAN"] = 99] = "SPAN";
1087
+ TAG_ID[TAG_ID["STRIKE"] = 100] = "STRIKE";
1088
+ TAG_ID[TAG_ID["STRONG"] = 101] = "STRONG";
1089
+ TAG_ID[TAG_ID["STYLE"] = 102] = "STYLE";
1090
+ TAG_ID[TAG_ID["SUB"] = 103] = "SUB";
1091
+ TAG_ID[TAG_ID["SUMMARY"] = 104] = "SUMMARY";
1092
+ TAG_ID[TAG_ID["SUP"] = 105] = "SUP";
1093
+ TAG_ID[TAG_ID["TABLE"] = 106] = "TABLE";
1094
+ TAG_ID[TAG_ID["TBODY"] = 107] = "TBODY";
1095
+ TAG_ID[TAG_ID["TEMPLATE"] = 108] = "TEMPLATE";
1096
+ TAG_ID[TAG_ID["TEXTAREA"] = 109] = "TEXTAREA";
1097
+ TAG_ID[TAG_ID["TFOOT"] = 110] = "TFOOT";
1098
+ TAG_ID[TAG_ID["TD"] = 111] = "TD";
1099
+ TAG_ID[TAG_ID["TH"] = 112] = "TH";
1100
+ TAG_ID[TAG_ID["THEAD"] = 113] = "THEAD";
1101
+ TAG_ID[TAG_ID["TITLE"] = 114] = "TITLE";
1102
+ TAG_ID[TAG_ID["TR"] = 115] = "TR";
1103
+ TAG_ID[TAG_ID["TRACK"] = 116] = "TRACK";
1104
+ TAG_ID[TAG_ID["TT"] = 117] = "TT";
1105
+ TAG_ID[TAG_ID["U"] = 118] = "U";
1106
+ TAG_ID[TAG_ID["UL"] = 119] = "UL";
1107
+ TAG_ID[TAG_ID["SVG"] = 120] = "SVG";
1108
+ TAG_ID[TAG_ID["VAR"] = 121] = "VAR";
1109
+ TAG_ID[TAG_ID["WBR"] = 122] = "WBR";
1110
+ TAG_ID[TAG_ID["XMP"] = 123] = "XMP";
1111
+ })(TAG_ID || (TAG_ID = {}));
740
1112
  const TAG_NAME_TO_ID = new Map([
741
1113
  [TAG_NAMES.A, TAG_ID.A],
742
1114
  [TAG_NAMES.ADDRESS, TAG_ID.ADDRESS],
@@ -831,6 +1203,7 @@ const TAG_NAME_TO_ID = new Map([
831
1203
  [TAG_NAMES.RUBY, TAG_ID.RUBY],
832
1204
  [TAG_NAMES.S, TAG_ID.S],
833
1205
  [TAG_NAMES.SCRIPT, TAG_ID.SCRIPT],
1206
+ [TAG_NAMES.SEARCH, TAG_ID.SEARCH],
834
1207
  [TAG_NAMES.SECTION, TAG_ID.SECTION],
835
1208
  [TAG_NAMES.SELECT, TAG_ID.SELECT],
836
1209
  [TAG_NAMES.SOURCE, TAG_ID.SOURCE],
@@ -956,40 +1329,8 @@ const SPECIAL_ELEMENTS = {
956
1329
  [NS.XML]: new Set(),
957
1330
  [NS.XMLNS]: new Set(),
958
1331
  };
959
- function isNumberedHeader(tn) {
960
- return tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6;
961
- }
1332
+ const NUMBERED_HEADERS = new Set([$.H1, $.H2, $.H3, $.H4, $.H5, $.H6]);
962
1333
 
963
- //C1 Unicode control character reference replacements
964
- const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
965
- [0x80, 8364],
966
- [0x82, 8218],
967
- [0x83, 402],
968
- [0x84, 8222],
969
- [0x85, 8230],
970
- [0x86, 8224],
971
- [0x87, 8225],
972
- [0x88, 710],
973
- [0x89, 8240],
974
- [0x8a, 352],
975
- [0x8b, 8249],
976
- [0x8c, 338],
977
- [0x8e, 381],
978
- [0x91, 8216],
979
- [0x92, 8217],
980
- [0x93, 8220],
981
- [0x94, 8221],
982
- [0x95, 8226],
983
- [0x96, 8211],
984
- [0x97, 8212],
985
- [0x98, 732],
986
- [0x99, 8482],
987
- [0x9a, 353],
988
- [0x9b, 8250],
989
- [0x9c, 339],
990
- [0x9e, 382],
991
- [0x9f, 376],
992
- ]);
993
1334
  //States
994
1335
  var State;
995
1336
  (function (State) {
@@ -1065,13 +1406,7 @@ var State;
1065
1406
  State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
1066
1407
  State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
1067
1408
  State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
1068
- State[State["NAMED_CHARACTER_REFERENCE"] = 72] = "NAMED_CHARACTER_REFERENCE";
1069
- State[State["AMBIGUOUS_AMPERSAND"] = 73] = "AMBIGUOUS_AMPERSAND";
1070
- State[State["NUMERIC_CHARACTER_REFERENCE"] = 74] = "NUMERIC_CHARACTER_REFERENCE";
1071
- State[State["HEXADEMICAL_CHARACTER_REFERENCE_START"] = 75] = "HEXADEMICAL_CHARACTER_REFERENCE_START";
1072
- State[State["HEXADEMICAL_CHARACTER_REFERENCE"] = 76] = "HEXADEMICAL_CHARACTER_REFERENCE";
1073
- State[State["DECIMAL_CHARACTER_REFERENCE"] = 77] = "DECIMAL_CHARACTER_REFERENCE";
1074
- State[State["NUMERIC_CHARACTER_REFERENCE_END"] = 78] = "NUMERIC_CHARACTER_REFERENCE_END";
1409
+ State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND";
1075
1410
  })(State || (State = {}));
1076
1411
  //Tokenizer initial states for different modes
1077
1412
  const TokenizerMode = {
@@ -1101,27 +1436,33 @@ function isAsciiLetter(cp) {
1101
1436
  function isAsciiAlphaNumeric(cp) {
1102
1437
  return isAsciiLetter(cp) || isAsciiDigit(cp);
1103
1438
  }
1104
- function isAsciiUpperHexDigit(cp) {
1105
- return cp >= CODE_POINTS.LATIN_CAPITAL_A && cp <= CODE_POINTS.LATIN_CAPITAL_F;
1106
- }
1107
- function isAsciiLowerHexDigit(cp) {
1108
- return cp >= CODE_POINTS.LATIN_SMALL_A && cp <= CODE_POINTS.LATIN_SMALL_F;
1109
- }
1110
- function isAsciiHexDigit(cp) {
1111
- return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp);
1112
- }
1113
1439
  function toAsciiLower(cp) {
1114
1440
  return cp + 32;
1115
1441
  }
1116
1442
  function isWhitespace(cp) {
1117
1443
  return cp === CODE_POINTS.SPACE || cp === CODE_POINTS.LINE_FEED || cp === CODE_POINTS.TABULATION || cp === CODE_POINTS.FORM_FEED;
1118
1444
  }
1119
- function isEntityInAttributeInvalidEnd(nextCp) {
1120
- return nextCp === CODE_POINTS.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp);
1121
- }
1122
1445
  function isScriptDataDoubleEscapeSequenceEnd(cp) {
1123
1446
  return isWhitespace(cp) || cp === CODE_POINTS.SOLIDUS || cp === CODE_POINTS.GREATER_THAN_SIGN;
1124
1447
  }
1448
+ function getErrorForNumericCharacterReference(code) {
1449
+ if (code === CODE_POINTS.NULL) {
1450
+ return ERR.nullCharacterReference;
1451
+ }
1452
+ else if (code > 1114111) {
1453
+ return ERR.characterReferenceOutsideUnicodeRange;
1454
+ }
1455
+ else if (isSurrogate(code)) {
1456
+ return ERR.surrogateCharacterReference;
1457
+ }
1458
+ else if (isUndefinedCodePoint(code)) {
1459
+ return ERR.noncharacterCharacterReference;
1460
+ }
1461
+ else if (isControlCodePoint(code) || code === CODE_POINTS.CARRIAGE_RETURN) {
1462
+ return ERR.controlCharacterReference;
1463
+ }
1464
+ return null;
1465
+ }
1125
1466
  //Tokenizer
1126
1467
  class Tokenizer {
1127
1468
  constructor(options, handler) {
@@ -1141,18 +1482,38 @@ class Tokenizer {
1141
1482
  this.active = false;
1142
1483
  this.state = State.DATA;
1143
1484
  this.returnState = State.DATA;
1144
- this.charRefCode = -1;
1485
+ this.entityStartPos = 0;
1145
1486
  this.consumedAfterSnapshot = -1;
1146
1487
  this.currentCharacterToken = null;
1147
1488
  this.currentToken = null;
1148
1489
  this.currentAttr = { name: '', value: '' };
1149
1490
  this.preprocessor = new Preprocessor(handler);
1150
1491
  this.currentLocation = this.getCurrentLocation(-1);
1492
+ this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => {
1493
+ // Note: Set `pos` _before_ flushing, as flushing might drop
1494
+ // the current chunk and invalidate `entityStartPos`.
1495
+ this.preprocessor.pos = this.entityStartPos + consumed - 1;
1496
+ this._flushCodePointConsumedAsCharacterReference(cp);
1497
+ }, handler.onParseError
1498
+ ? {
1499
+ missingSemicolonAfterCharacterReference: () => {
1500
+ this._err(ERR.missingSemicolonAfterCharacterReference, 1);
1501
+ },
1502
+ absenceOfDigitsInNumericCharacterReference: (consumed) => {
1503
+ this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed);
1504
+ },
1505
+ validateNumericCharacterReference: (code) => {
1506
+ const error = getErrorForNumericCharacterReference(code);
1507
+ if (error)
1508
+ this._err(error, 1);
1509
+ },
1510
+ }
1511
+ : undefined);
1151
1512
  }
1152
1513
  //Errors
1153
- _err(code) {
1514
+ _err(code, cpOffset = 0) {
1154
1515
  var _a, _b;
1155
- (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code));
1516
+ (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset));
1156
1517
  }
1157
1518
  // NOTE: `offset` may never run across line boundaries.
1158
1519
  getCurrentLocation(offset) {
@@ -1214,7 +1575,8 @@ class Tokenizer {
1214
1575
  //Hibernation
1215
1576
  _ensureHibernation() {
1216
1577
  if (this.preprocessor.endOfChunkHit) {
1217
- this._unconsume(this.consumedAfterSnapshot);
1578
+ this.preprocessor.retreat(this.consumedAfterSnapshot);
1579
+ this.consumedAfterSnapshot = 0;
1218
1580
  this.active = false;
1219
1581
  return true;
1220
1582
  }
@@ -1225,14 +1587,6 @@ class Tokenizer {
1225
1587
  this.consumedAfterSnapshot++;
1226
1588
  return this.preprocessor.advance();
1227
1589
  }
1228
- _unconsume(count) {
1229
- this.consumedAfterSnapshot -= count;
1230
- this.preprocessor.retreat(count);
1231
- }
1232
- _reconsumeInState(state, cp) {
1233
- this.state = state;
1234
- this._callState(cp);
1235
- }
1236
1590
  _advanceBy(count) {
1237
1591
  this.consumedAfterSnapshot += count;
1238
1592
  for (let i = 0; i < count; i++) {
@@ -1404,7 +1758,7 @@ class Tokenizer {
1404
1758
  this.active = false;
1405
1759
  }
1406
1760
  //Characters emission
1407
- //OPTIMIZATION: specification uses only one type of character tokens (one token per character).
1761
+ //OPTIMIZATION: The specification uses only one type of character token (one token per character).
1408
1762
  //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
1409
1763
  //If we have a sequence of characters that belong to the same group, the parser can process it
1410
1764
  //as a single solid character token.
@@ -1414,15 +1768,15 @@ class Tokenizer {
1414
1768
  //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
1415
1769
  _appendCharToCurrentCharacterToken(type, ch) {
1416
1770
  if (this.currentCharacterToken) {
1417
- if (this.currentCharacterToken.type !== type) {
1771
+ if (this.currentCharacterToken.type === type) {
1772
+ this.currentCharacterToken.chars += ch;
1773
+ return;
1774
+ }
1775
+ else {
1418
1776
  this.currentLocation = this.getCurrentLocation(0);
1419
1777
  this._emitCurrentCharacterToken(this.currentLocation);
1420
1778
  this.preprocessor.dropParsedChunk();
1421
1779
  }
1422
- else {
1423
- this.currentCharacterToken.chars += ch;
1424
- return;
1425
- }
1426
1780
  }
1427
1781
  this._createCharacterToken(type, ch);
1428
1782
  }
@@ -1440,59 +1794,11 @@ class Tokenizer {
1440
1794
  this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
1441
1795
  }
1442
1796
  // Character reference helpers
1443
- _matchNamedCharacterReference(cp) {
1444
- let result = null;
1445
- let excess = 0;
1446
- let withoutSemicolon = false;
1447
- for (let i = 0, current = htmlDecodeTree[0]; i >= 0; cp = this._consume()) {
1448
- i = determineBranch(htmlDecodeTree, current, i + 1, cp);
1449
- if (i < 0)
1450
- break;
1451
- excess += 1;
1452
- current = htmlDecodeTree[i];
1453
- const masked = current & BinTrieFlags.VALUE_LENGTH;
1454
- // If the branch is a value, store it and continue
1455
- if (masked) {
1456
- // The mask is the number of bytes of the value, including the current byte.
1457
- const valueLength = (masked >> 14) - 1;
1458
- // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
1459
- // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
1460
- if (cp !== CODE_POINTS.SEMICOLON &&
1461
- this._isCharacterReferenceInAttribute() &&
1462
- isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))) {
1463
- //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after
1464
- //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes.
1465
- result = [CODE_POINTS.AMPERSAND];
1466
- // Skip over the value.
1467
- i += valueLength;
1468
- }
1469
- else {
1470
- // If this is a surrogate pair, consume the next two bytes.
1471
- result =
1472
- valueLength === 0
1473
- ? [htmlDecodeTree[i] & ~BinTrieFlags.VALUE_LENGTH]
1474
- : valueLength === 1
1475
- ? [htmlDecodeTree[++i]]
1476
- : [htmlDecodeTree[++i], htmlDecodeTree[++i]];
1477
- excess = 0;
1478
- withoutSemicolon = cp !== CODE_POINTS.SEMICOLON;
1479
- }
1480
- if (valueLength === 0) {
1481
- // If the value is zero-length, we're done.
1482
- this._consume();
1483
- break;
1484
- }
1485
- }
1486
- }
1487
- this._unconsume(excess);
1488
- if (withoutSemicolon && !this.preprocessor.endOfChunkHit) {
1489
- this._err(ERR.missingSemicolonAfterCharacterReference);
1490
- }
1491
- // We want to emit the error above on the code point after the entity.
1492
- // We always consume one code point too many in the loop, and we wait to
1493
- // unconsume it until after the error is emitted.
1494
- this._unconsume(1);
1495
- return result;
1797
+ _startCharacterReference() {
1798
+ this.returnState = this.state;
1799
+ this.state = State.CHARACTER_REFERENCE;
1800
+ this.entityStartPos = this.preprocessor.pos;
1801
+ this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy);
1496
1802
  }
1497
1803
  _isCharacterReferenceInAttribute() {
1498
1804
  return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
@@ -1795,37 +2101,13 @@ class Tokenizer {
1795
2101
  break;
1796
2102
  }
1797
2103
  case State.CHARACTER_REFERENCE: {
1798
- this._stateCharacterReference(cp);
1799
- break;
1800
- }
1801
- case State.NAMED_CHARACTER_REFERENCE: {
1802
- this._stateNamedCharacterReference(cp);
2104
+ this._stateCharacterReference();
1803
2105
  break;
1804
2106
  }
1805
2107
  case State.AMBIGUOUS_AMPERSAND: {
1806
2108
  this._stateAmbiguousAmpersand(cp);
1807
2109
  break;
1808
2110
  }
1809
- case State.NUMERIC_CHARACTER_REFERENCE: {
1810
- this._stateNumericCharacterReference(cp);
1811
- break;
1812
- }
1813
- case State.HEXADEMICAL_CHARACTER_REFERENCE_START: {
1814
- this._stateHexademicalCharacterReferenceStart(cp);
1815
- break;
1816
- }
1817
- case State.HEXADEMICAL_CHARACTER_REFERENCE: {
1818
- this._stateHexademicalCharacterReference(cp);
1819
- break;
1820
- }
1821
- case State.DECIMAL_CHARACTER_REFERENCE: {
1822
- this._stateDecimalCharacterReference(cp);
1823
- break;
1824
- }
1825
- case State.NUMERIC_CHARACTER_REFERENCE_END: {
1826
- this._stateNumericCharacterReferenceEnd(cp);
1827
- break;
1828
- }
1829
2111
  default: {
1830
2112
  throw new Error('Unknown state');
1831
2113
  }
@@ -1841,8 +2123,7 @@ class Tokenizer {
1841
2123
  break;
1842
2124
  }
1843
2125
  case CODE_POINTS.AMPERSAND: {
1844
- this.returnState = State.DATA;
1845
- this.state = State.CHARACTER_REFERENCE;
2126
+ this._startCharacterReference();
1846
2127
  break;
1847
2128
  }
1848
2129
  case CODE_POINTS.NULL: {
@@ -1864,8 +2145,7 @@ class Tokenizer {
1864
2145
  _stateRcdata(cp) {
1865
2146
  switch (cp) {
1866
2147
  case CODE_POINTS.AMPERSAND: {
1867
- this.returnState = State.RCDATA;
1868
- this.state = State.CHARACTER_REFERENCE;
2148
+ this._startCharacterReference();
1869
2149
  break;
1870
2150
  }
1871
2151
  case CODE_POINTS.LESS_THAN_SIGN: {
@@ -2634,8 +2914,7 @@ class Tokenizer {
2634
2914
  break;
2635
2915
  }
2636
2916
  case CODE_POINTS.AMPERSAND: {
2637
- this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
2638
- this.state = State.CHARACTER_REFERENCE;
2917
+ this._startCharacterReference();
2639
2918
  break;
2640
2919
  }
2641
2920
  case CODE_POINTS.NULL: {
@@ -2662,8 +2941,7 @@ class Tokenizer {
2662
2941
  break;
2663
2942
  }
2664
2943
  case CODE_POINTS.AMPERSAND: {
2665
- this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
2666
- this.state = State.CHARACTER_REFERENCE;
2944
+ this._startCharacterReference();
2667
2945
  break;
2668
2946
  }
2669
2947
  case CODE_POINTS.NULL: {
@@ -2694,8 +2972,7 @@ class Tokenizer {
2694
2972
  break;
2695
2973
  }
2696
2974
  case CODE_POINTS.AMPERSAND: {
2697
- this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED;
2698
- this.state = State.CHARACTER_REFERENCE;
2975
+ this._startCharacterReference();
2699
2976
  break;
2700
2977
  }
2701
2978
  case CODE_POINTS.GREATER_THAN_SIGN: {
@@ -3711,35 +3988,35 @@ class Tokenizer {
3711
3988
  }
3712
3989
  // Character reference state
3713
3990
  //------------------------------------------------------------------
3714
- _stateCharacterReference(cp) {
3715
- if (cp === CODE_POINTS.NUMBER_SIGN) {
3716
- this.state = State.NUMERIC_CHARACTER_REFERENCE;
3717
- }
3718
- else if (isAsciiAlphaNumeric(cp)) {
3719
- this.state = State.NAMED_CHARACTER_REFERENCE;
3720
- this._stateNamedCharacterReference(cp);
3991
+ _stateCharacterReference() {
3992
+ let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
3993
+ if (length < 0) {
3994
+ if (this.preprocessor.lastChunkWritten) {
3995
+ length = this.entityDecoder.end();
3996
+ }
3997
+ else {
3998
+ // Wait for the rest of the entity.
3999
+ this.active = false;
4000
+ // Mark the entire buffer as read.
4001
+ this.preprocessor.pos = this.preprocessor.html.length - 1;
4002
+ this.consumedAfterSnapshot = 0;
4003
+ this.preprocessor.endOfChunkHit = true;
4004
+ return;
4005
+ }
3721
4006
  }
3722
- else {
4007
+ if (length === 0) {
4008
+ // This was not a valid entity. Go back to the beginning, and
4009
+ // figure out what to do.
4010
+ this.preprocessor.pos = this.entityStartPos;
3723
4011
  this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3724
- this._reconsumeInState(this.returnState, cp);
3725
- }
3726
- }
3727
- // Named character reference state
3728
- //------------------------------------------------------------------
3729
- _stateNamedCharacterReference(cp) {
3730
- const matchResult = this._matchNamedCharacterReference(cp);
3731
- //NOTE: Matching can be abrupted by hibernation. In that case, match
3732
- //results are no longer valid and we will need to start over.
3733
- if (this._ensureHibernation()) ;
3734
- else if (matchResult) {
3735
- for (let i = 0; i < matchResult.length; i++) {
3736
- this._flushCodePointConsumedAsCharacterReference(matchResult[i]);
3737
- }
3738
- this.state = this.returnState;
4012
+ this.state =
4013
+ !this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
4014
+ ? State.AMBIGUOUS_AMPERSAND
4015
+ : this.returnState;
3739
4016
  }
3740
4017
  else {
3741
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3742
- this.state = State.AMBIGUOUS_AMPERSAND;
4018
+ // We successfully parsed an entity. Switch to the return state.
4019
+ this.state = this.returnState;
3743
4020
  }
3744
4021
  }
3745
4022
  // Ambiguos ampersand state
@@ -3752,107 +4029,10 @@ class Tokenizer {
3752
4029
  if (cp === CODE_POINTS.SEMICOLON) {
3753
4030
  this._err(ERR.unknownNamedCharacterReference);
3754
4031
  }
3755
- this._reconsumeInState(this.returnState, cp);
3756
- }
3757
- }
3758
- // Numeric character reference state
3759
- //------------------------------------------------------------------
3760
- _stateNumericCharacterReference(cp) {
3761
- this.charRefCode = 0;
3762
- if (cp === CODE_POINTS.LATIN_SMALL_X || cp === CODE_POINTS.LATIN_CAPITAL_X) {
3763
- this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
3764
- }
3765
- // Inlined decimal character reference start state
3766
- else if (isAsciiDigit(cp)) {
3767
- this.state = State.DECIMAL_CHARACTER_REFERENCE;
3768
- this._stateDecimalCharacterReference(cp);
3769
- }
3770
- else {
3771
- this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3772
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3773
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.NUMBER_SIGN);
3774
- this._reconsumeInState(this.returnState, cp);
3775
- }
3776
- }
3777
- // Hexademical character reference start state
3778
- //------------------------------------------------------------------
3779
- _stateHexademicalCharacterReferenceStart(cp) {
3780
- if (isAsciiHexDigit(cp)) {
3781
- this.state = State.HEXADEMICAL_CHARACTER_REFERENCE;
3782
- this._stateHexademicalCharacterReference(cp);
3783
- }
3784
- else {
3785
- this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3786
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3787
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.NUMBER_SIGN);
3788
- this._unconsume(2);
3789
4032
  this.state = this.returnState;
4033
+ this._callState(cp);
3790
4034
  }
3791
4035
  }
3792
- // Hexademical character reference state
3793
- //------------------------------------------------------------------
3794
- _stateHexademicalCharacterReference(cp) {
3795
- if (isAsciiUpperHexDigit(cp)) {
3796
- this.charRefCode = this.charRefCode * 16 + cp - 0x37;
3797
- }
3798
- else if (isAsciiLowerHexDigit(cp)) {
3799
- this.charRefCode = this.charRefCode * 16 + cp - 0x57;
3800
- }
3801
- else if (isAsciiDigit(cp)) {
3802
- this.charRefCode = this.charRefCode * 16 + cp - 0x30;
3803
- }
3804
- else if (cp === CODE_POINTS.SEMICOLON) {
3805
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3806
- }
3807
- else {
3808
- this._err(ERR.missingSemicolonAfterCharacterReference);
3809
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3810
- this._stateNumericCharacterReferenceEnd(cp);
3811
- }
3812
- }
3813
- // Decimal character reference state
3814
- //------------------------------------------------------------------
3815
- _stateDecimalCharacterReference(cp) {
3816
- if (isAsciiDigit(cp)) {
3817
- this.charRefCode = this.charRefCode * 10 + cp - 0x30;
3818
- }
3819
- else if (cp === CODE_POINTS.SEMICOLON) {
3820
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3821
- }
3822
- else {
3823
- this._err(ERR.missingSemicolonAfterCharacterReference);
3824
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3825
- this._stateNumericCharacterReferenceEnd(cp);
3826
- }
3827
- }
3828
- // Numeric character reference end state
3829
- //------------------------------------------------------------------
3830
- _stateNumericCharacterReferenceEnd(cp) {
3831
- if (this.charRefCode === CODE_POINTS.NULL) {
3832
- this._err(ERR.nullCharacterReference);
3833
- this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
3834
- }
3835
- else if (this.charRefCode > 1114111) {
3836
- this._err(ERR.characterReferenceOutsideUnicodeRange);
3837
- this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
3838
- }
3839
- else if (isSurrogate(this.charRefCode)) {
3840
- this._err(ERR.surrogateCharacterReference);
3841
- this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
3842
- }
3843
- else if (isUndefinedCodePoint(this.charRefCode)) {
3844
- this._err(ERR.noncharacterCharacterReference);
3845
- }
3846
- else if (isControlCodePoint(this.charRefCode) || this.charRefCode === CODE_POINTS.CARRIAGE_RETURN) {
3847
- this._err(ERR.controlCharacterReference);
3848
- const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode);
3849
- if (replacement !== undefined) {
3850
- this.charRefCode = replacement;
3851
- }
3852
- }
3853
- this._flushCodePointConsumedAsCharacterReference(this.charRefCode);
3854
- this._reconsumeInState(this.returnState, cp);
3855
- }
3856
4036
  }
3857
4037
 
3858
4038
  //Element utils
@@ -3868,31 +4048,25 @@ const IMPLICIT_END_TAG_REQUIRED_THOROUGHLY = new Set([
3868
4048
  TAG_ID.THEAD,
3869
4049
  TAG_ID.TR,
3870
4050
  ]);
3871
- const SCOPING_ELEMENT_NS = new Map([
3872
- [TAG_ID.APPLET, NS.HTML],
3873
- [TAG_ID.CAPTION, NS.HTML],
3874
- [TAG_ID.HTML, NS.HTML],
3875
- [TAG_ID.MARQUEE, NS.HTML],
3876
- [TAG_ID.OBJECT, NS.HTML],
3877
- [TAG_ID.TABLE, NS.HTML],
3878
- [TAG_ID.TD, NS.HTML],
3879
- [TAG_ID.TEMPLATE, NS.HTML],
3880
- [TAG_ID.TH, NS.HTML],
3881
- [TAG_ID.ANNOTATION_XML, NS.MATHML],
3882
- [TAG_ID.MI, NS.MATHML],
3883
- [TAG_ID.MN, NS.MATHML],
3884
- [TAG_ID.MO, NS.MATHML],
3885
- [TAG_ID.MS, NS.MATHML],
3886
- [TAG_ID.MTEXT, NS.MATHML],
3887
- [TAG_ID.DESC, NS.SVG],
3888
- [TAG_ID.FOREIGN_OBJECT, NS.SVG],
3889
- [TAG_ID.TITLE, NS.SVG],
4051
+ const SCOPING_ELEMENTS_HTML = new Set([
4052
+ TAG_ID.APPLET,
4053
+ TAG_ID.CAPTION,
4054
+ TAG_ID.HTML,
4055
+ TAG_ID.MARQUEE,
4056
+ TAG_ID.OBJECT,
4057
+ TAG_ID.TABLE,
4058
+ TAG_ID.TD,
4059
+ TAG_ID.TEMPLATE,
4060
+ TAG_ID.TH,
3890
4061
  ]);
3891
- const NAMED_HEADERS = [TAG_ID.H1, TAG_ID.H2, TAG_ID.H3, TAG_ID.H4, TAG_ID.H5, TAG_ID.H6];
3892
- const TABLE_ROW_CONTEXT = [TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML];
3893
- const TABLE_BODY_CONTEXT = [TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML];
3894
- const TABLE_CONTEXT = [TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML];
3895
- const TABLE_CELLS = [TAG_ID.TD, TAG_ID.TH];
4062
+ const SCOPING_ELEMENTS_HTML_LIST = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.OL, TAG_ID.UL]);
4063
+ const SCOPING_ELEMENTS_HTML_BUTTON = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.BUTTON]);
4064
+ const SCOPING_ELEMENTS_MATHML = new Set([TAG_ID.ANNOTATION_XML, TAG_ID.MI, TAG_ID.MN, TAG_ID.MO, TAG_ID.MS, TAG_ID.MTEXT]);
4065
+ const SCOPING_ELEMENTS_SVG = new Set([TAG_ID.DESC, TAG_ID.FOREIGN_OBJECT, TAG_ID.TITLE]);
4066
+ const TABLE_ROW_CONTEXT = new Set([TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML]);
4067
+ const TABLE_BODY_CONTEXT = new Set([TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML]);
4068
+ const TABLE_CONTEXT = new Set([TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML]);
4069
+ const TABLE_CELLS = new Set([TAG_ID.TD, TAG_ID.TH]);
3896
4070
  //Stack of open elements
3897
4071
  class OpenElementStack {
3898
4072
  get currentTmplContentOrNode() {
@@ -3985,7 +4159,7 @@ class OpenElementStack {
3985
4159
  this.shortenToLength(idx < 0 ? 0 : idx);
3986
4160
  }
3987
4161
  popUntilNumberedHeaderPopped() {
3988
- this.popUntilPopped(NAMED_HEADERS, NS.HTML);
4162
+ this.popUntilPopped(NUMBERED_HEADERS, NS.HTML);
3989
4163
  }
3990
4164
  popUntilTableCellPopped() {
3991
4165
  this.popUntilPopped(TABLE_CELLS, NS.HTML);
@@ -3998,7 +4172,7 @@ class OpenElementStack {
3998
4172
  }
3999
4173
  _indexOfTagNames(tagNames, namespace) {
4000
4174
  for (let i = this.stackTop; i >= 0; i--) {
4001
- if (tagNames.includes(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) {
4175
+ if (tagNames.has(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) {
4002
4176
  return i;
4003
4177
  }
4004
4178
  }
@@ -4048,102 +4222,117 @@ class OpenElementStack {
4048
4222
  return this.stackTop === 0 && this.tagIDs[0] === TAG_ID.HTML;
4049
4223
  }
4050
4224
  //Element in scope
4051
- hasInScope(tagName) {
4225
+ hasInDynamicScope(tagName, htmlScope) {
4052
4226
  for (let i = this.stackTop; i >= 0; i--) {
4053
4227
  const tn = this.tagIDs[i];
4054
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4055
- if (tn === tagName && ns === NS.HTML) {
4056
- return true;
4057
- }
4058
- if (SCOPING_ELEMENT_NS.get(tn) === ns) {
4059
- return false;
4228
+ switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
4229
+ case NS.HTML: {
4230
+ if (tn === tagName)
4231
+ return true;
4232
+ if (htmlScope.has(tn))
4233
+ return false;
4234
+ break;
4235
+ }
4236
+ case NS.SVG: {
4237
+ if (SCOPING_ELEMENTS_SVG.has(tn))
4238
+ return false;
4239
+ break;
4240
+ }
4241
+ case NS.MATHML: {
4242
+ if (SCOPING_ELEMENTS_MATHML.has(tn))
4243
+ return false;
4244
+ break;
4245
+ }
4060
4246
  }
4061
4247
  }
4062
4248
  return true;
4063
4249
  }
4064
- hasNumberedHeaderInScope() {
4065
- for (let i = this.stackTop; i >= 0; i--) {
4066
- const tn = this.tagIDs[i];
4067
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4068
- if (isNumberedHeader(tn) && ns === NS.HTML) {
4069
- return true;
4070
- }
4071
- if (SCOPING_ELEMENT_NS.get(tn) === ns) {
4072
- return false;
4073
- }
4074
- }
4075
- return true;
4250
+ hasInScope(tagName) {
4251
+ return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML);
4076
4252
  }
4077
4253
  hasInListItemScope(tagName) {
4078
- for (let i = this.stackTop; i >= 0; i--) {
4079
- const tn = this.tagIDs[i];
4080
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4081
- if (tn === tagName && ns === NS.HTML) {
4082
- return true;
4083
- }
4084
- if (((tn === TAG_ID.UL || tn === TAG_ID.OL) && ns === NS.HTML) || SCOPING_ELEMENT_NS.get(tn) === ns) {
4085
- return false;
4086
- }
4087
- }
4088
- return true;
4254
+ return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_LIST);
4089
4255
  }
4090
4256
  hasInButtonScope(tagName) {
4257
+ return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_BUTTON);
4258
+ }
4259
+ hasNumberedHeaderInScope() {
4091
4260
  for (let i = this.stackTop; i >= 0; i--) {
4092
4261
  const tn = this.tagIDs[i];
4093
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4094
- if (tn === tagName && ns === NS.HTML) {
4095
- return true;
4096
- }
4097
- if ((tn === TAG_ID.BUTTON && ns === NS.HTML) || SCOPING_ELEMENT_NS.get(tn) === ns) {
4098
- return false;
4262
+ switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
4263
+ case NS.HTML: {
4264
+ if (NUMBERED_HEADERS.has(tn))
4265
+ return true;
4266
+ if (SCOPING_ELEMENTS_HTML.has(tn))
4267
+ return false;
4268
+ break;
4269
+ }
4270
+ case NS.SVG: {
4271
+ if (SCOPING_ELEMENTS_SVG.has(tn))
4272
+ return false;
4273
+ break;
4274
+ }
4275
+ case NS.MATHML: {
4276
+ if (SCOPING_ELEMENTS_MATHML.has(tn))
4277
+ return false;
4278
+ break;
4279
+ }
4099
4280
  }
4100
4281
  }
4101
4282
  return true;
4102
4283
  }
4103
4284
  hasInTableScope(tagName) {
4104
4285
  for (let i = this.stackTop; i >= 0; i--) {
4105
- const tn = this.tagIDs[i];
4106
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4107
- if (ns !== NS.HTML) {
4286
+ if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
4108
4287
  continue;
4109
4288
  }
4110
- if (tn === tagName) {
4111
- return true;
4112
- }
4113
- if (tn === TAG_ID.TABLE || tn === TAG_ID.TEMPLATE || tn === TAG_ID.HTML) {
4114
- return false;
4289
+ switch (this.tagIDs[i]) {
4290
+ case tagName: {
4291
+ return true;
4292
+ }
4293
+ case TAG_ID.TABLE:
4294
+ case TAG_ID.HTML: {
4295
+ return false;
4296
+ }
4115
4297
  }
4116
4298
  }
4117
4299
  return true;
4118
4300
  }
4119
4301
  hasTableBodyContextInTableScope() {
4120
4302
  for (let i = this.stackTop; i >= 0; i--) {
4121
- const tn = this.tagIDs[i];
4122
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4123
- if (ns !== NS.HTML) {
4303
+ if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
4124
4304
  continue;
4125
4305
  }
4126
- if (tn === TAG_ID.TBODY || tn === TAG_ID.THEAD || tn === TAG_ID.TFOOT) {
4127
- return true;
4128
- }
4129
- if (tn === TAG_ID.TABLE || tn === TAG_ID.HTML) {
4130
- return false;
4306
+ switch (this.tagIDs[i]) {
4307
+ case TAG_ID.TBODY:
4308
+ case TAG_ID.THEAD:
4309
+ case TAG_ID.TFOOT: {
4310
+ return true;
4311
+ }
4312
+ case TAG_ID.TABLE:
4313
+ case TAG_ID.HTML: {
4314
+ return false;
4315
+ }
4131
4316
  }
4132
4317
  }
4133
4318
  return true;
4134
4319
  }
4135
4320
  hasInSelectScope(tagName) {
4136
4321
  for (let i = this.stackTop; i >= 0; i--) {
4137
- const tn = this.tagIDs[i];
4138
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4139
- if (ns !== NS.HTML) {
4322
+ if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
4140
4323
  continue;
4141
4324
  }
4142
- if (tn === tagName) {
4143
- return true;
4144
- }
4145
- if (tn !== TAG_ID.OPTION && tn !== TAG_ID.OPTGROUP) {
4146
- return false;
4325
+ switch (this.tagIDs[i]) {
4326
+ case tagName: {
4327
+ return true;
4328
+ }
4329
+ case TAG_ID.OPTION:
4330
+ case TAG_ID.OPTGROUP: {
4331
+ break;
4332
+ }
4333
+ default: {
4334
+ return false;
4335
+ }
4147
4336
  }
4148
4337
  }
4149
4338
  return true;
@@ -4172,7 +4361,7 @@ var EntryType;
4172
4361
  (function (EntryType) {
4173
4362
  EntryType[EntryType["Marker"] = 0] = "Marker";
4174
4363
  EntryType[EntryType["Element"] = 1] = "Element";
4175
- })(EntryType = EntryType || (EntryType = {}));
4364
+ })(EntryType || (EntryType = {}));
4176
4365
  const MARKER = { type: EntryType.Marker };
4177
4366
  //List of formatting elements
4178
4367
  class FormattingElementList {
@@ -4277,13 +4466,6 @@ class FormattingElementList {
4277
4466
  }
4278
4467
  }
4279
4468
 
4280
- function createTextNode(value) {
4281
- return {
4282
- nodeName: '#text',
4283
- value,
4284
- parentNode: null,
4285
- };
4286
- }
4287
4469
  const defaultTreeAdapter = {
4288
4470
  //Node construction
4289
4471
  createDocument() {
@@ -4316,6 +4498,13 @@ const defaultTreeAdapter = {
4316
4498
  parentNode: null,
4317
4499
  };
4318
4500
  },
4501
+ createTextNode(value) {
4502
+ return {
4503
+ nodeName: '#text',
4504
+ value,
4505
+ parentNode: null,
4506
+ };
4507
+ },
4319
4508
  //Tree mutation
4320
4509
  appendChild(parentNode, newNode) {
4321
4510
  parentNode.childNodes.push(newNode);
@@ -4371,7 +4560,7 @@ const defaultTreeAdapter = {
4371
4560
  return;
4372
4561
  }
4373
4562
  }
4374
- defaultTreeAdapter.appendChild(parentNode, createTextNode(text));
4563
+ defaultTreeAdapter.appendChild(parentNode, defaultTreeAdapter.createTextNode(text));
4375
4564
  },
4376
4565
  insertTextBefore(parentNode, text, referenceNode) {
4377
4566
  const prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];
@@ -4379,7 +4568,7 @@ const defaultTreeAdapter = {
4379
4568
  prevNode.value += text;
4380
4569
  }
4381
4570
  else {
4382
- defaultTreeAdapter.insertBefore(parentNode, createTextNode(text), referenceNode);
4571
+ defaultTreeAdapter.insertBefore(parentNode, defaultTreeAdapter.createTextNode(text), referenceNode);
4383
4572
  }
4384
4573
  },
4385
4574
  adoptAttributes(recipient, attrs) {
@@ -4640,7 +4829,6 @@ const XML_ATTRS_ADJUSTMENT_MAP = new Map([
4640
4829
  ['xlink:show', { prefix: 'xlink', name: 'show', namespace: NS.XLINK }],
4641
4830
  ['xlink:title', { prefix: 'xlink', name: 'title', namespace: NS.XLINK }],
4642
4831
  ['xlink:type', { prefix: 'xlink', name: 'type', namespace: NS.XLINK }],
4643
- ['xml:base', { prefix: 'xml', name: 'base', namespace: NS.XML }],
4644
4832
  ['xml:lang', { prefix: 'xml', name: 'lang', namespace: NS.XML }],
4645
4833
  ['xml:space', { prefix: 'xml', name: 'space', namespace: NS.XML }],
4646
4834
  ['xmlns', { prefix: '', name: 'xmlns', namespace: NS.XMLNS }],
@@ -4842,26 +5030,41 @@ const defaultParserOptions = {
4842
5030
  };
4843
5031
  //Parser
4844
5032
  class Parser {
4845
- constructor(options, document, fragmentContext = null, scriptHandler = null) {
5033
+ constructor(options, document,
5034
+ /** @internal */
5035
+ fragmentContext = null,
5036
+ /** @internal */
5037
+ scriptHandler = null) {
4846
5038
  this.fragmentContext = fragmentContext;
4847
5039
  this.scriptHandler = scriptHandler;
4848
5040
  this.currentToken = null;
4849
5041
  this.stopped = false;
5042
+ /** @internal */
4850
5043
  this.insertionMode = InsertionMode.INITIAL;
5044
+ /** @internal */
4851
5045
  this.originalInsertionMode = InsertionMode.INITIAL;
5046
+ /** @internal */
4852
5047
  this.headElement = null;
5048
+ /** @internal */
4853
5049
  this.formElement = null;
4854
5050
  /** Indicates that the current node is not an element in the HTML namespace */
4855
5051
  this.currentNotInHTML = false;
4856
5052
  /**
4857
5053
  * The template insertion mode stack is maintained from the left.
4858
5054
  * Ie. the topmost element will always have index 0.
5055
+ *
5056
+ * @internal
4859
5057
  */
4860
5058
  this.tmplInsertionModeStack = [];
5059
+ /** @internal */
4861
5060
  this.pendingCharacterTokens = [];
5061
+ /** @internal */
4862
5062
  this.hasNonWhitespacePendingCharacterToken = false;
5063
+ /** @internal */
4863
5064
  this.framesetOk = true;
5065
+ /** @internal */
4864
5066
  this.skipNextNewLine = false;
5067
+ /** @internal */
4865
5068
  this.fosterParentingEnabled = false;
4866
5069
  this.options = {
4867
5070
  ...defaultParserOptions,
@@ -4915,6 +5118,7 @@ class Parser {
4915
5118
  return fragment;
4916
5119
  }
4917
5120
  //Errors
5121
+ /** @internal */
4918
5122
  _err(token, code, beforeToken) {
4919
5123
  var _a;
4920
5124
  if (!this.onParseError)
@@ -4932,12 +5136,14 @@ class Parser {
4932
5136
  this.onParseError(err);
4933
5137
  }
4934
5138
  //Stack events
5139
+ /** @internal */
4935
5140
  onItemPush(node, tid, isTop) {
4936
5141
  var _a, _b;
4937
5142
  (_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node);
4938
5143
  if (isTop && this.openElements.stackTop > 0)
4939
5144
  this._setContextModes(node, tid);
4940
5145
  }
5146
+ /** @internal */
4941
5147
  onItemPop(node, isTop) {
4942
5148
  var _a, _b;
4943
5149
  if (this.options.sourceCodeLocationInfo) {
@@ -4962,6 +5168,7 @@ class Parser {
4962
5168
  this.currentNotInHTML = !isHTML;
4963
5169
  this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current);
4964
5170
  }
5171
+ /** @protected */
4965
5172
  _switchToTextParsing(currentToken, nextTokenizerState) {
4966
5173
  this._insertElement(currentToken, NS.HTML);
4967
5174
  this.tokenizer.state = nextTokenizerState;
@@ -4974,11 +5181,13 @@ class Parser {
4974
5181
  this.tokenizer.state = TokenizerMode.PLAINTEXT;
4975
5182
  }
4976
5183
  //Fragment parsing
5184
+ /** @protected */
4977
5185
  _getAdjustedCurrentElement() {
4978
5186
  return this.openElements.stackTop === 0 && this.fragmentContext
4979
5187
  ? this.fragmentContext
4980
5188
  : this.openElements.current;
4981
5189
  }
5190
+ /** @protected */
4982
5191
  _findFormInFragmentContext() {
4983
5192
  let node = this.fragmentContext;
4984
5193
  while (node) {
@@ -5020,6 +5229,7 @@ class Parser {
5020
5229
  }
5021
5230
  }
5022
5231
  //Tree mutation
5232
+ /** @protected */
5023
5233
  _setDocumentType(token) {
5024
5234
  const name = token.name || '';
5025
5235
  const publicId = token.publicId || '';
@@ -5033,6 +5243,7 @@ class Parser {
5033
5243
  }
5034
5244
  }
5035
5245
  }
5246
+ /** @protected */
5036
5247
  _attachElementToTree(element, location) {
5037
5248
  if (this.options.sourceCodeLocationInfo) {
5038
5249
  const loc = location && {
@@ -5049,20 +5260,28 @@ class Parser {
5049
5260
  this.treeAdapter.appendChild(parent, element);
5050
5261
  }
5051
5262
  }
5263
+ /**
5264
+ * For self-closing tags. Add an element to the tree, but skip adding it
5265
+ * to the stack.
5266
+ */
5267
+ /** @protected */
5052
5268
  _appendElement(token, namespaceURI) {
5053
5269
  const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
5054
5270
  this._attachElementToTree(element, token.location);
5055
5271
  }
5272
+ /** @protected */
5056
5273
  _insertElement(token, namespaceURI) {
5057
5274
  const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
5058
5275
  this._attachElementToTree(element, token.location);
5059
5276
  this.openElements.push(element, token.tagID);
5060
5277
  }
5278
+ /** @protected */
5061
5279
  _insertFakeElement(tagName, tagID) {
5062
5280
  const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
5063
5281
  this._attachElementToTree(element, null);
5064
5282
  this.openElements.push(element, tagID);
5065
5283
  }
5284
+ /** @protected */
5066
5285
  _insertTemplate(token) {
5067
5286
  const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
5068
5287
  const content = this.treeAdapter.createDocumentFragment();
@@ -5072,6 +5291,7 @@ class Parser {
5072
5291
  if (this.options.sourceCodeLocationInfo)
5073
5292
  this.treeAdapter.setNodeSourceCodeLocation(content, null);
5074
5293
  }
5294
+ /** @protected */
5075
5295
  _insertFakeRootElement() {
5076
5296
  const element = this.treeAdapter.createElement(TAG_NAMES.HTML, NS.HTML, []);
5077
5297
  if (this.options.sourceCodeLocationInfo)
@@ -5079,6 +5299,7 @@ class Parser {
5079
5299
  this.treeAdapter.appendChild(this.openElements.current, element);
5080
5300
  this.openElements.push(element, TAG_ID.HTML);
5081
5301
  }
5302
+ /** @protected */
5082
5303
  _appendCommentNode(token, parent) {
5083
5304
  const commentNode = this.treeAdapter.createCommentNode(token.data);
5084
5305
  this.treeAdapter.appendChild(parent, commentNode);
@@ -5086,6 +5307,7 @@ class Parser {
5086
5307
  this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
5087
5308
  }
5088
5309
  }
5310
+ /** @protected */
5089
5311
  _insertCharacters(token) {
5090
5312
  let parent;
5091
5313
  let beforeElement;
@@ -5117,12 +5339,14 @@ class Parser {
5117
5339
  this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
5118
5340
  }
5119
5341
  }
5342
+ /** @protected */
5120
5343
  _adoptNodes(donor, recipient) {
5121
5344
  for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
5122
5345
  this.treeAdapter.detachNode(child);
5123
5346
  this.treeAdapter.appendChild(recipient, child);
5124
5347
  }
5125
5348
  }
5349
+ /** @protected */
5126
5350
  _setEndLocation(element, closingToken) {
5127
5351
  if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) {
5128
5352
  const ctLoc = closingToken.location;
@@ -5172,6 +5396,7 @@ class Parser {
5172
5396
  ((token.tagID === TAG_ID.MGLYPH || token.tagID === TAG_ID.MALIGNMARK) &&
5173
5397
  !this._isIntegrationPoint(currentTagId, current, NS.HTML)));
5174
5398
  }
5399
+ /** @protected */
5175
5400
  _processToken(token) {
5176
5401
  switch (token.type) {
5177
5402
  case TokenType.CHARACTER: {
@@ -5209,12 +5434,14 @@ class Parser {
5209
5434
  }
5210
5435
  }
5211
5436
  //Integration points
5437
+ /** @protected */
5212
5438
  _isIntegrationPoint(tid, element, foreignNS) {
5213
5439
  const ns = this.treeAdapter.getNamespaceURI(element);
5214
5440
  const attrs = this.treeAdapter.getAttrList(element);
5215
5441
  return isIntegrationPoint(tid, ns, attrs, foreignNS);
5216
5442
  }
5217
5443
  //Active formatting elements reconstruction
5444
+ /** @protected */
5218
5445
  _reconstructActiveFormattingElements() {
5219
5446
  const listLength = this.activeFormattingElements.entries.length;
5220
5447
  if (listLength) {
@@ -5228,17 +5455,20 @@ class Parser {
5228
5455
  }
5229
5456
  }
5230
5457
  //Close elements
5458
+ /** @protected */
5231
5459
  _closeTableCell() {
5232
5460
  this.openElements.generateImpliedEndTags();
5233
5461
  this.openElements.popUntilTableCellPopped();
5234
5462
  this.activeFormattingElements.clearToLastMarker();
5235
5463
  this.insertionMode = InsertionMode.IN_ROW;
5236
5464
  }
5465
+ /** @protected */
5237
5466
  _closePElement() {
5238
5467
  this.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.P);
5239
5468
  this.openElements.popUntilTagNamePopped(TAG_ID.P);
5240
5469
  }
5241
5470
  //Insertion modes
5471
+ /** @protected */
5242
5472
  _resetInsertionMode() {
5243
5473
  for (let i = this.openElements.stackTop; i >= 0; i--) {
5244
5474
  //Insertion mode reset map
@@ -5304,6 +5534,7 @@ class Parser {
5304
5534
  }
5305
5535
  this.insertionMode = InsertionMode.IN_BODY;
5306
5536
  }
5537
+ /** @protected */
5307
5538
  _resetInsertionModeForSelect(selectIdx) {
5308
5539
  if (selectIdx > 0) {
5309
5540
  for (let i = selectIdx - 1; i > 0; i--) {
@@ -5320,12 +5551,15 @@ class Parser {
5320
5551
  this.insertionMode = InsertionMode.IN_SELECT;
5321
5552
  }
5322
5553
  //Foster parenting
5554
+ /** @protected */
5323
5555
  _isElementCausesFosterParenting(tn) {
5324
5556
  return TABLE_STRUCTURE_TAGS.has(tn);
5325
5557
  }
5558
+ /** @protected */
5326
5559
  _shouldFosterParentOnInsertion() {
5327
5560
  return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId);
5328
5561
  }
5562
+ /** @protected */
5329
5563
  _findFosterParentingLocation() {
5330
5564
  for (let i = this.openElements.stackTop; i >= 0; i--) {
5331
5565
  const openElement = this.openElements.items[i];
@@ -5348,6 +5582,7 @@ class Parser {
5348
5582
  }
5349
5583
  return { parent: this.openElements.items[0], beforeElement: null };
5350
5584
  }
5585
+ /** @protected */
5351
5586
  _fosterParentElement(element) {
5352
5587
  const location = this._findFosterParentingLocation();
5353
5588
  if (location.beforeElement) {
@@ -5358,10 +5593,12 @@ class Parser {
5358
5593
  }
5359
5594
  }
5360
5595
  //Special elements
5596
+ /** @protected */
5361
5597
  _isSpecialElement(element, id) {
5362
5598
  const ns = this.treeAdapter.getNamespaceURI(element);
5363
5599
  return SPECIAL_ELEMENTS[ns].has(id);
5364
5600
  }
5601
+ /** @internal */
5365
5602
  onCharacter(token) {
5366
5603
  this.skipNextNewLine = false;
5367
5604
  if (this.tokenizer.inForeignNode) {
@@ -5431,6 +5668,7 @@ class Parser {
5431
5668
  // Do nothing
5432
5669
  }
5433
5670
  }
5671
+ /** @internal */
5434
5672
  onNullCharacter(token) {
5435
5673
  this.skipNextNewLine = false;
5436
5674
  if (this.tokenizer.inForeignNode) {
@@ -5487,6 +5725,7 @@ class Parser {
5487
5725
  // Do nothing
5488
5726
  }
5489
5727
  }
5728
+ /** @internal */
5490
5729
  onComment(token) {
5491
5730
  this.skipNextNewLine = false;
5492
5731
  if (this.currentNotInHTML) {
@@ -5531,6 +5770,7 @@ class Parser {
5531
5770
  // Do nothing
5532
5771
  }
5533
5772
  }
5773
+ /** @internal */
5534
5774
  onDoctype(token) {
5535
5775
  this.skipNextNewLine = false;
5536
5776
  switch (this.insertionMode) {
@@ -5552,6 +5792,7 @@ class Parser {
5552
5792
  // Do nothing
5553
5793
  }
5554
5794
  }
5795
+ /** @internal */
5555
5796
  onStartTag(token) {
5556
5797
  this.skipNextNewLine = false;
5557
5798
  this.currentToken = token;
@@ -5569,6 +5810,7 @@ class Parser {
5569
5810
  * for nested calls.
5570
5811
  *
5571
5812
  * @param token The token to process.
5813
+ * @protected
5572
5814
  */
5573
5815
  _processStartTag(token) {
5574
5816
  if (this.shouldProcessStartTagTokenInForeignContent(token)) {
@@ -5578,6 +5820,7 @@ class Parser {
5578
5820
  this._startTagOutsideForeignContent(token);
5579
5821
  }
5580
5822
  }
5823
+ /** @protected */
5581
5824
  _startTagOutsideForeignContent(token) {
5582
5825
  switch (this.insertionMode) {
5583
5826
  case InsertionMode.INITIAL: {
@@ -5671,6 +5914,7 @@ class Parser {
5671
5914
  // Do nothing
5672
5915
  }
5673
5916
  }
5917
+ /** @internal */
5674
5918
  onEndTag(token) {
5675
5919
  this.skipNextNewLine = false;
5676
5920
  this.currentToken = token;
@@ -5681,6 +5925,7 @@ class Parser {
5681
5925
  this._endTagOutsideForeignContent(token);
5682
5926
  }
5683
5927
  }
5928
+ /** @protected */
5684
5929
  _endTagOutsideForeignContent(token) {
5685
5930
  switch (this.insertionMode) {
5686
5931
  case InsertionMode.INITIAL: {
@@ -5774,6 +6019,7 @@ class Parser {
5774
6019
  // Do nothing
5775
6020
  }
5776
6021
  }
6022
+ /** @internal */
5777
6023
  onEof(token) {
5778
6024
  switch (this.insertionMode) {
5779
6025
  case InsertionMode.INITIAL: {
@@ -5835,6 +6081,7 @@ class Parser {
5835
6081
  // Do nothing
5836
6082
  }
5837
6083
  }
6084
+ /** @internal */
5838
6085
  onWhitespaceCharacter(token) {
5839
6086
  if (this.skipNextNewLine) {
5840
6087
  this.skipNextNewLine = false;
@@ -6405,7 +6652,7 @@ function numberedHeaderStartTagInBody(p, token) {
6405
6652
  if (p.openElements.hasInButtonScope(TAG_ID.P)) {
6406
6653
  p._closePElement();
6407
6654
  }
6408
- if (isNumberedHeader(p.openElements.currentTagId)) {
6655
+ if (NUMBERED_HEADERS.has(p.openElements.currentTagId)) {
6409
6656
  p.openElements.pop();
6410
6657
  }
6411
6658
  p._insertElement(token, NS.HTML);
@@ -6567,9 +6814,9 @@ function iframeStartTagInBody(p, token) {
6567
6814
  p.framesetOk = false;
6568
6815
  p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
6569
6816
  }
6570
- //NOTE: here we assume that we always act as an user agent with enabled plugins, so we parse
6571
- //<noembed> as rawtext.
6572
- function noembedStartTagInBody(p, token) {
6817
+ //NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse
6818
+ //<noembed>/<noframes> as rawtext.
6819
+ function rawTextStartTagInBody(p, token) {
6573
6820
  p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
6574
6821
  }
6575
6822
  function selectStartTagInBody(p, token) {
@@ -6681,6 +6928,7 @@ function startTagInBody(p, token) {
6681
6928
  case TAG_ID.DETAILS:
6682
6929
  case TAG_ID.ADDRESS:
6683
6930
  case TAG_ID.ARTICLE:
6931
+ case TAG_ID.SEARCH:
6684
6932
  case TAG_ID.SECTION:
6685
6933
  case TAG_ID.SUMMARY:
6686
6934
  case TAG_ID.FIELDSET:
@@ -6804,8 +7052,9 @@ function startTagInBody(p, token) {
6804
7052
  optgroupStartTagInBody(p, token);
6805
7053
  break;
6806
7054
  }
6807
- case TAG_ID.NOEMBED: {
6808
- noembedStartTagInBody(p, token);
7055
+ case TAG_ID.NOEMBED:
7056
+ case TAG_ID.NOFRAMES: {
7057
+ rawTextStartTagInBody(p, token);
6809
7058
  break;
6810
7059
  }
6811
7060
  case TAG_ID.FRAMESET: {
@@ -6818,7 +7067,7 @@ function startTagInBody(p, token) {
6818
7067
  }
6819
7068
  case TAG_ID.NOSCRIPT: {
6820
7069
  if (p.options.scriptingEnabled) {
6821
- noembedStartTagInBody(p, token);
7070
+ rawTextStartTagInBody(p, token);
6822
7071
  }
6823
7072
  else {
6824
7073
  genericStartTagInBody(p, token);
@@ -6990,6 +7239,7 @@ function endTagInBody(p, token) {
6990
7239
  case TAG_ID.ADDRESS:
6991
7240
  case TAG_ID.ARTICLE:
6992
7241
  case TAG_ID.DETAILS:
7242
+ case TAG_ID.SEARCH:
6993
7243
  case TAG_ID.SECTION:
6994
7244
  case TAG_ID.SUMMARY:
6995
7245
  case TAG_ID.LISTING:
@@ -7590,6 +7840,17 @@ function startTagInSelect(p, token) {
7590
7840
  p._insertElement(token, NS.HTML);
7591
7841
  break;
7592
7842
  }
7843
+ case TAG_ID.HR: {
7844
+ if (p.openElements.currentTagId === TAG_ID.OPTION) {
7845
+ p.openElements.pop();
7846
+ }
7847
+ if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
7848
+ p.openElements.pop();
7849
+ }
7850
+ p._appendElement(token, NS.HTML);
7851
+ token.ackSelfClosing = true;
7852
+ break;
7853
+ }
7593
7854
  case TAG_ID.INPUT:
7594
7855
  case TAG_ID.KEYGEN:
7595
7856
  case TAG_ID.TEXTAREA: