vite 6.0.0-beta.3 → 6.0.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,5 @@
1
- import { fileURLToPath as __cjs_fileURLToPath } from 'node:url';
2
- import { dirname as __cjs_dirname } from 'node:path';
3
1
  import { createRequire as __cjs_createRequire } from 'node:module';
4
2
 
5
- const __filename = __cjs_fileURLToPath(import.meta.url);
6
- const __dirname = __cjs_dirname(__filename);
7
3
  const require = __cjs_createRequire(import.meta.url);
8
4
  const __require = require;
9
5
  const UNDEFINED_CODE_POINTS = new Set([
@@ -24,7 +20,6 @@ var CODE_POINTS;
24
20
  CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE";
25
21
  CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK";
26
22
  CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK";
27
- CODE_POINTS[CODE_POINTS["NUMBER_SIGN"] = 35] = "NUMBER_SIGN";
28
23
  CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND";
29
24
  CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE";
30
25
  CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS";
@@ -37,17 +32,12 @@ var CODE_POINTS;
37
32
  CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN";
38
33
  CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK";
39
34
  CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A";
40
- CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_F"] = 70] = "LATIN_CAPITAL_F";
41
- CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_X"] = 88] = "LATIN_CAPITAL_X";
42
35
  CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z";
43
36
  CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET";
44
37
  CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT";
45
38
  CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A";
46
- CODE_POINTS[CODE_POINTS["LATIN_SMALL_F"] = 102] = "LATIN_SMALL_F";
47
- CODE_POINTS[CODE_POINTS["LATIN_SMALL_X"] = 120] = "LATIN_SMALL_X";
48
39
  CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z";
49
- CODE_POINTS[CODE_POINTS["REPLACEMENT_CHARACTER"] = 65533] = "REPLACEMENT_CHARACTER";
50
- })(CODE_POINTS = CODE_POINTS || (CODE_POINTS = {}));
40
+ })(CODE_POINTS || (CODE_POINTS = {}));
51
41
  const SEQUENCES = {
52
42
  DASH_DASH: '--',
53
43
  CDATA_START: '[CDATA[',
@@ -137,7 +127,7 @@ var ERR;
137
127
  ERR["misplacedStartTagForHeadElement"] = "misplaced-start-tag-for-head-element";
138
128
  ERR["nestedNoscriptInHead"] = "nested-noscript-in-head";
139
129
  ERR["eofInElementThatCanContainOnlyText"] = "eof-in-element-that-can-contain-only-text";
140
- })(ERR = ERR || (ERR = {}));
130
+ })(ERR || (ERR = {}));
141
131
 
142
132
  //Const
143
133
  const DEFAULT_BUFFER_WATERLINE = 1 << 16;
@@ -170,22 +160,24 @@ class Preprocessor {
170
160
  get offset() {
171
161
  return this.droppedBufferSize + this.pos;
172
162
  }
173
- getError(code) {
163
+ getError(code, cpOffset) {
174
164
  const { line, col, offset } = this;
165
+ const startCol = col + cpOffset;
166
+ const startOffset = offset + cpOffset;
175
167
  return {
176
168
  code,
177
169
  startLine: line,
178
170
  endLine: line,
179
- startCol: col,
180
- endCol: col,
181
- startOffset: offset,
182
- endOffset: offset,
171
+ startCol,
172
+ endCol: startCol,
173
+ startOffset,
174
+ endOffset: startOffset,
183
175
  };
184
176
  }
185
177
  _err(code) {
186
178
  if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
187
179
  this.lastErrOffset = this.offset;
188
- this.handler.onParseError(this.getError(code));
180
+ this.handler.onParseError(this.getError(code, 0));
189
181
  }
190
182
  }
191
183
  _addGap() {
@@ -343,7 +335,7 @@ var TokenType;
343
335
  TokenType[TokenType["DOCTYPE"] = 6] = "DOCTYPE";
344
336
  TokenType[TokenType["EOF"] = 7] = "EOF";
345
337
  TokenType[TokenType["HIBERNATION"] = 8] = "HIBERNATION";
346
- })(TokenType = TokenType || (TokenType = {}));
338
+ })(TokenType || (TokenType = {}));
347
339
  function getTokenAttr(token, attrName) {
348
340
  for (let i = token.attrs.length - 1; i >= 0; i--) {
349
341
  if (token.attrs[i].name === attrName) {
@@ -367,6 +359,51 @@ new Uint16Array(
367
359
  .split("")
368
360
  .map((c) => c.charCodeAt(0)));
369
361
 
362
+ // Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
363
+ const decodeMap = new Map([
364
+ [0, 65533],
365
+ // C1 Unicode control character reference replacements
366
+ [128, 8364],
367
+ [130, 8218],
368
+ [131, 402],
369
+ [132, 8222],
370
+ [133, 8230],
371
+ [134, 8224],
372
+ [135, 8225],
373
+ [136, 710],
374
+ [137, 8240],
375
+ [138, 352],
376
+ [139, 8249],
377
+ [140, 338],
378
+ [142, 381],
379
+ [145, 8216],
380
+ [146, 8217],
381
+ [147, 8220],
382
+ [148, 8221],
383
+ [149, 8226],
384
+ [150, 8211],
385
+ [151, 8212],
386
+ [152, 732],
387
+ [153, 8482],
388
+ [154, 353],
389
+ [155, 8250],
390
+ [156, 339],
391
+ [158, 382],
392
+ [159, 376],
393
+ ]);
394
+ /**
395
+ * Replace the given code point with a replacement character if it is a
396
+ * surrogate or is outside the valid range. Otherwise return the code
397
+ * point unchanged.
398
+ */
399
+ function replaceCodePoint(codePoint) {
400
+ var _a;
401
+ if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
402
+ return 0xfffd;
403
+ }
404
+ return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
405
+ }
406
+
370
407
  var CharCodes;
371
408
  (function (CharCodes) {
372
409
  CharCodes[CharCodes["NUM"] = 35] = "NUM";
@@ -382,12 +419,35 @@ var CharCodes;
382
419
  CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
383
420
  CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
384
421
  })(CharCodes || (CharCodes = {}));
422
+ /** Bit that needs to be set to convert an upper case ASCII character to lower case */
423
+ const TO_LOWER_BIT = 0b100000;
385
424
  var BinTrieFlags;
386
425
  (function (BinTrieFlags) {
387
426
  BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
388
427
  BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
389
428
  BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
390
429
  })(BinTrieFlags || (BinTrieFlags = {}));
430
+ function isNumber(code) {
431
+ return code >= CharCodes.ZERO && code <= CharCodes.NINE;
432
+ }
433
+ function isHexadecimalCharacter(code) {
434
+ return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
435
+ (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
436
+ }
437
+ function isAsciiAlphaNumeric$1(code) {
438
+ return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
439
+ (code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
440
+ isNumber(code));
441
+ }
442
+ /**
443
+ * Checks if the given character is a valid end character for an entity in an attribute.
444
+ *
445
+ * Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
446
+ * See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
447
+ */
448
+ function isEntityInAttributeInvalidEnd(code) {
449
+ return code === CharCodes.EQUALS || isAsciiAlphaNumeric$1(code);
450
+ }
391
451
  var EntityDecoderState;
392
452
  (function (EntityDecoderState) {
393
453
  EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
@@ -405,6 +465,320 @@ var DecodingMode;
405
465
  /** Entities in attributes have limitations on ending characters. */
406
466
  DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
407
467
  })(DecodingMode || (DecodingMode = {}));
468
+ /**
469
+ * Token decoder with support of writing partial entities.
470
+ */
471
+ class EntityDecoder {
472
+ constructor(
473
+ /** The tree used to decode entities. */
474
+ decodeTree,
475
+ /**
476
+ * The function that is called when a codepoint is decoded.
477
+ *
478
+ * For multi-byte named entities, this will be called multiple times,
479
+ * with the second codepoint, and the same `consumed` value.
480
+ *
481
+ * @param codepoint The decoded codepoint.
482
+ * @param consumed The number of bytes consumed by the decoder.
483
+ */
484
+ emitCodePoint,
485
+ /** An object that is used to produce errors. */
486
+ errors) {
487
+ this.decodeTree = decodeTree;
488
+ this.emitCodePoint = emitCodePoint;
489
+ this.errors = errors;
490
+ /** The current state of the decoder. */
491
+ this.state = EntityDecoderState.EntityStart;
492
+ /** Characters that were consumed while parsing an entity. */
493
+ this.consumed = 1;
494
+ /**
495
+ * The result of the entity.
496
+ *
497
+ * Either the result index of a numeric entity, or the codepoint of a
498
+ * numeric entity.
499
+ */
500
+ this.result = 0;
501
+ /** The current index in the decode tree. */
502
+ this.treeIndex = 0;
503
+ /** The number of characters that were consumed in excess. */
504
+ this.excess = 1;
505
+ /** The mode in which the decoder is operating. */
506
+ this.decodeMode = DecodingMode.Strict;
507
+ }
508
+ /** Resets the instance to make it reusable. */
509
+ startEntity(decodeMode) {
510
+ this.decodeMode = decodeMode;
511
+ this.state = EntityDecoderState.EntityStart;
512
+ this.result = 0;
513
+ this.treeIndex = 0;
514
+ this.excess = 1;
515
+ this.consumed = 1;
516
+ }
517
+ /**
518
+ * Write an entity to the decoder. This can be called multiple times with partial entities.
519
+ * If the entity is incomplete, the decoder will return -1.
520
+ *
521
+ * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
522
+ * entity is incomplete, and resume when the next string is written.
523
+ *
524
+ * @param string The string containing the entity (or a continuation of the entity).
525
+ * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
526
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
527
+ */
528
+ write(str, offset) {
529
+ switch (this.state) {
530
+ case EntityDecoderState.EntityStart: {
531
+ if (str.charCodeAt(offset) === CharCodes.NUM) {
532
+ this.state = EntityDecoderState.NumericStart;
533
+ this.consumed += 1;
534
+ return this.stateNumericStart(str, offset + 1);
535
+ }
536
+ this.state = EntityDecoderState.NamedEntity;
537
+ return this.stateNamedEntity(str, offset);
538
+ }
539
+ case EntityDecoderState.NumericStart: {
540
+ return this.stateNumericStart(str, offset);
541
+ }
542
+ case EntityDecoderState.NumericDecimal: {
543
+ return this.stateNumericDecimal(str, offset);
544
+ }
545
+ case EntityDecoderState.NumericHex: {
546
+ return this.stateNumericHex(str, offset);
547
+ }
548
+ case EntityDecoderState.NamedEntity: {
549
+ return this.stateNamedEntity(str, offset);
550
+ }
551
+ }
552
+ }
553
+ /**
554
+ * Switches between the numeric decimal and hexadecimal states.
555
+ *
556
+ * Equivalent to the `Numeric character reference state` in the HTML spec.
557
+ *
558
+ * @param str The string containing the entity (or a continuation of the entity).
559
+ * @param offset The current offset.
560
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
561
+ */
562
+ stateNumericStart(str, offset) {
563
+ if (offset >= str.length) {
564
+ return -1;
565
+ }
566
+ if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
567
+ this.state = EntityDecoderState.NumericHex;
568
+ this.consumed += 1;
569
+ return this.stateNumericHex(str, offset + 1);
570
+ }
571
+ this.state = EntityDecoderState.NumericDecimal;
572
+ return this.stateNumericDecimal(str, offset);
573
+ }
574
+ addToNumericResult(str, start, end, base) {
575
+ if (start !== end) {
576
+ const digitCount = end - start;
577
+ this.result =
578
+ this.result * Math.pow(base, digitCount) +
579
+ parseInt(str.substr(start, digitCount), base);
580
+ this.consumed += digitCount;
581
+ }
582
+ }
583
+ /**
584
+ * Parses a hexadecimal numeric entity.
585
+ *
586
+ * Equivalent to the `Hexademical character reference state` in the HTML spec.
587
+ *
588
+ * @param str The string containing the entity (or a continuation of the entity).
589
+ * @param offset The current offset.
590
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
591
+ */
592
+ stateNumericHex(str, offset) {
593
+ const startIdx = offset;
594
+ while (offset < str.length) {
595
+ const char = str.charCodeAt(offset);
596
+ if (isNumber(char) || isHexadecimalCharacter(char)) {
597
+ offset += 1;
598
+ }
599
+ else {
600
+ this.addToNumericResult(str, startIdx, offset, 16);
601
+ return this.emitNumericEntity(char, 3);
602
+ }
603
+ }
604
+ this.addToNumericResult(str, startIdx, offset, 16);
605
+ return -1;
606
+ }
607
+ /**
608
+ * Parses a decimal numeric entity.
609
+ *
610
+ * Equivalent to the `Decimal character reference state` in the HTML spec.
611
+ *
612
+ * @param str The string containing the entity (or a continuation of the entity).
613
+ * @param offset The current offset.
614
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
615
+ */
616
+ stateNumericDecimal(str, offset) {
617
+ const startIdx = offset;
618
+ while (offset < str.length) {
619
+ const char = str.charCodeAt(offset);
620
+ if (isNumber(char)) {
621
+ offset += 1;
622
+ }
623
+ else {
624
+ this.addToNumericResult(str, startIdx, offset, 10);
625
+ return this.emitNumericEntity(char, 2);
626
+ }
627
+ }
628
+ this.addToNumericResult(str, startIdx, offset, 10);
629
+ return -1;
630
+ }
631
+ /**
632
+ * Validate and emit a numeric entity.
633
+ *
634
+ * Implements the logic from the `Hexademical character reference start
635
+ * state` and `Numeric character reference end state` in the HTML spec.
636
+ *
637
+ * @param lastCp The last code point of the entity. Used to see if the
638
+ * entity was terminated with a semicolon.
639
+ * @param expectedLength The minimum number of characters that should be
640
+ * consumed. Used to validate that at least one digit
641
+ * was consumed.
642
+ * @returns The number of characters that were consumed.
643
+ */
644
+ emitNumericEntity(lastCp, expectedLength) {
645
+ var _a;
646
+ // Ensure we consumed at least one digit.
647
+ if (this.consumed <= expectedLength) {
648
+ (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
649
+ return 0;
650
+ }
651
+ // Figure out if this is a legit end of the entity
652
+ if (lastCp === CharCodes.SEMI) {
653
+ this.consumed += 1;
654
+ }
655
+ else if (this.decodeMode === DecodingMode.Strict) {
656
+ return 0;
657
+ }
658
+ this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
659
+ if (this.errors) {
660
+ if (lastCp !== CharCodes.SEMI) {
661
+ this.errors.missingSemicolonAfterCharacterReference();
662
+ }
663
+ this.errors.validateNumericCharacterReference(this.result);
664
+ }
665
+ return this.consumed;
666
+ }
667
+ /**
668
+ * Parses a named entity.
669
+ *
670
+ * Equivalent to the `Named character reference state` in the HTML spec.
671
+ *
672
+ * @param str The string containing the entity (or a continuation of the entity).
673
+ * @param offset The current offset.
674
+ * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
675
+ */
676
+ stateNamedEntity(str, offset) {
677
+ const { decodeTree } = this;
678
+ let current = decodeTree[this.treeIndex];
679
+ // The mask is the number of bytes of the value, including the current byte.
680
+ let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
681
+ for (; offset < str.length; offset++, this.excess++) {
682
+ const char = str.charCodeAt(offset);
683
+ this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
684
+ if (this.treeIndex < 0) {
685
+ return this.result === 0 ||
686
+ // If we are parsing an attribute
687
+ (this.decodeMode === DecodingMode.Attribute &&
688
+ // We shouldn't have consumed any characters after the entity,
689
+ (valueLength === 0 ||
690
+ // And there should be no invalid characters.
691
+ isEntityInAttributeInvalidEnd(char)))
692
+ ? 0
693
+ : this.emitNotTerminatedNamedEntity();
694
+ }
695
+ current = decodeTree[this.treeIndex];
696
+ valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
697
+ // If the branch is a value, store it and continue
698
+ if (valueLength !== 0) {
699
+ // If the entity is terminated by a semicolon, we are done.
700
+ if (char === CharCodes.SEMI) {
701
+ return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
702
+ }
703
+ // If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
704
+ if (this.decodeMode !== DecodingMode.Strict) {
705
+ this.result = this.treeIndex;
706
+ this.consumed += this.excess;
707
+ this.excess = 0;
708
+ }
709
+ }
710
+ }
711
+ return -1;
712
+ }
713
+ /**
714
+ * Emit a named entity that was not terminated with a semicolon.
715
+ *
716
+ * @returns The number of characters consumed.
717
+ */
718
+ emitNotTerminatedNamedEntity() {
719
+ var _a;
720
+ const { result, decodeTree } = this;
721
+ const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
722
+ this.emitNamedEntityData(result, valueLength, this.consumed);
723
+ (_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
724
+ return this.consumed;
725
+ }
726
+ /**
727
+ * Emit a named entity.
728
+ *
729
+ * @param result The index of the entity in the decode tree.
730
+ * @param valueLength The number of bytes in the entity.
731
+ * @param consumed The number of characters consumed.
732
+ *
733
+ * @returns The number of characters consumed.
734
+ */
735
+ emitNamedEntityData(result, valueLength, consumed) {
736
+ const { decodeTree } = this;
737
+ this.emitCodePoint(valueLength === 1
738
+ ? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
739
+ : decodeTree[result + 1], consumed);
740
+ if (valueLength === 3) {
741
+ // For multi-byte values, we need to emit the second byte.
742
+ this.emitCodePoint(decodeTree[result + 2], consumed);
743
+ }
744
+ return consumed;
745
+ }
746
+ /**
747
+ * Signal to the parser that the end of the input was reached.
748
+ *
749
+ * Remaining data will be emitted and relevant errors will be produced.
750
+ *
751
+ * @returns The number of characters consumed.
752
+ */
753
+ end() {
754
+ var _a;
755
+ switch (this.state) {
756
+ case EntityDecoderState.NamedEntity: {
757
+ // Emit a named entity if we have one.
758
+ return this.result !== 0 &&
759
+ (this.decodeMode !== DecodingMode.Attribute ||
760
+ this.result === this.treeIndex)
761
+ ? this.emitNotTerminatedNamedEntity()
762
+ : 0;
763
+ }
764
+ // Otherwise, emit a numeric entity if we have one.
765
+ case EntityDecoderState.NumericDecimal: {
766
+ return this.emitNumericEntity(0, 2);
767
+ }
768
+ case EntityDecoderState.NumericHex: {
769
+ return this.emitNumericEntity(0, 3);
770
+ }
771
+ case EntityDecoderState.NumericStart: {
772
+ (_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
773
+ return 0;
774
+ }
775
+ case EntityDecoderState.EntityStart: {
776
+ // Return 0 if we have no entity.
777
+ return 0;
778
+ }
779
+ }
780
+ }
781
+ }
408
782
  /**
409
783
  * Determines the branch of the current node that is taken given the current
410
784
  * character. This function is used to traverse the trie.
@@ -458,7 +832,7 @@ var NS;
458
832
  NS["XLINK"] = "http://www.w3.org/1999/xlink";
459
833
  NS["XML"] = "http://www.w3.org/XML/1998/namespace";
460
834
  NS["XMLNS"] = "http://www.w3.org/2000/xmlns/";
461
- })(NS = NS || (NS = {}));
835
+ })(NS || (NS = {}));
462
836
  var ATTRS;
463
837
  (function (ATTRS) {
464
838
  ATTRS["TYPE"] = "type";
@@ -469,7 +843,7 @@ var ATTRS;
469
843
  ATTRS["COLOR"] = "color";
470
844
  ATTRS["FACE"] = "face";
471
845
  ATTRS["SIZE"] = "size";
472
- })(ATTRS = ATTRS || (ATTRS = {}));
846
+ })(ATTRS || (ATTRS = {}));
473
847
  /**
474
848
  * The mode of the document.
475
849
  *
@@ -480,7 +854,7 @@ var DOCUMENT_MODE;
480
854
  DOCUMENT_MODE["NO_QUIRKS"] = "no-quirks";
481
855
  DOCUMENT_MODE["QUIRKS"] = "quirks";
482
856
  DOCUMENT_MODE["LIMITED_QUIRKS"] = "limited-quirks";
483
- })(DOCUMENT_MODE = DOCUMENT_MODE || (DOCUMENT_MODE = {}));
857
+ })(DOCUMENT_MODE || (DOCUMENT_MODE = {}));
484
858
  var TAG_NAMES;
485
859
  (function (TAG_NAMES) {
486
860
  TAG_NAMES["A"] = "a";
@@ -576,6 +950,7 @@ var TAG_NAMES;
576
950
  TAG_NAMES["RUBY"] = "ruby";
577
951
  TAG_NAMES["S"] = "s";
578
952
  TAG_NAMES["SCRIPT"] = "script";
953
+ TAG_NAMES["SEARCH"] = "search";
579
954
  TAG_NAMES["SECTION"] = "section";
580
955
  TAG_NAMES["SELECT"] = "select";
581
956
  TAG_NAMES["SOURCE"] = "source";
@@ -605,7 +980,7 @@ var TAG_NAMES;
605
980
  TAG_NAMES["VAR"] = "var";
606
981
  TAG_NAMES["WBR"] = "wbr";
607
982
  TAG_NAMES["XMP"] = "xmp";
608
- })(TAG_NAMES = TAG_NAMES || (TAG_NAMES = {}));
983
+ })(TAG_NAMES || (TAG_NAMES = {}));
609
984
  /**
610
985
  * Tag IDs are numeric IDs for known tag names.
611
986
  *
@@ -707,36 +1082,37 @@ var TAG_ID;
707
1082
  TAG_ID[TAG_ID["RUBY"] = 91] = "RUBY";
708
1083
  TAG_ID[TAG_ID["S"] = 92] = "S";
709
1084
  TAG_ID[TAG_ID["SCRIPT"] = 93] = "SCRIPT";
710
- TAG_ID[TAG_ID["SECTION"] = 94] = "SECTION";
711
- TAG_ID[TAG_ID["SELECT"] = 95] = "SELECT";
712
- TAG_ID[TAG_ID["SOURCE"] = 96] = "SOURCE";
713
- TAG_ID[TAG_ID["SMALL"] = 97] = "SMALL";
714
- TAG_ID[TAG_ID["SPAN"] = 98] = "SPAN";
715
- TAG_ID[TAG_ID["STRIKE"] = 99] = "STRIKE";
716
- TAG_ID[TAG_ID["STRONG"] = 100] = "STRONG";
717
- TAG_ID[TAG_ID["STYLE"] = 101] = "STYLE";
718
- TAG_ID[TAG_ID["SUB"] = 102] = "SUB";
719
- TAG_ID[TAG_ID["SUMMARY"] = 103] = "SUMMARY";
720
- TAG_ID[TAG_ID["SUP"] = 104] = "SUP";
721
- TAG_ID[TAG_ID["TABLE"] = 105] = "TABLE";
722
- TAG_ID[TAG_ID["TBODY"] = 106] = "TBODY";
723
- TAG_ID[TAG_ID["TEMPLATE"] = 107] = "TEMPLATE";
724
- TAG_ID[TAG_ID["TEXTAREA"] = 108] = "TEXTAREA";
725
- TAG_ID[TAG_ID["TFOOT"] = 109] = "TFOOT";
726
- TAG_ID[TAG_ID["TD"] = 110] = "TD";
727
- TAG_ID[TAG_ID["TH"] = 111] = "TH";
728
- TAG_ID[TAG_ID["THEAD"] = 112] = "THEAD";
729
- TAG_ID[TAG_ID["TITLE"] = 113] = "TITLE";
730
- TAG_ID[TAG_ID["TR"] = 114] = "TR";
731
- TAG_ID[TAG_ID["TRACK"] = 115] = "TRACK";
732
- TAG_ID[TAG_ID["TT"] = 116] = "TT";
733
- TAG_ID[TAG_ID["U"] = 117] = "U";
734
- TAG_ID[TAG_ID["UL"] = 118] = "UL";
735
- TAG_ID[TAG_ID["SVG"] = 119] = "SVG";
736
- TAG_ID[TAG_ID["VAR"] = 120] = "VAR";
737
- TAG_ID[TAG_ID["WBR"] = 121] = "WBR";
738
- TAG_ID[TAG_ID["XMP"] = 122] = "XMP";
739
- })(TAG_ID = TAG_ID || (TAG_ID = {}));
1085
+ TAG_ID[TAG_ID["SEARCH"] = 94] = "SEARCH";
1086
+ TAG_ID[TAG_ID["SECTION"] = 95] = "SECTION";
1087
+ TAG_ID[TAG_ID["SELECT"] = 96] = "SELECT";
1088
+ TAG_ID[TAG_ID["SOURCE"] = 97] = "SOURCE";
1089
+ TAG_ID[TAG_ID["SMALL"] = 98] = "SMALL";
1090
+ TAG_ID[TAG_ID["SPAN"] = 99] = "SPAN";
1091
+ TAG_ID[TAG_ID["STRIKE"] = 100] = "STRIKE";
1092
+ TAG_ID[TAG_ID["STRONG"] = 101] = "STRONG";
1093
+ TAG_ID[TAG_ID["STYLE"] = 102] = "STYLE";
1094
+ TAG_ID[TAG_ID["SUB"] = 103] = "SUB";
1095
+ TAG_ID[TAG_ID["SUMMARY"] = 104] = "SUMMARY";
1096
+ TAG_ID[TAG_ID["SUP"] = 105] = "SUP";
1097
+ TAG_ID[TAG_ID["TABLE"] = 106] = "TABLE";
1098
+ TAG_ID[TAG_ID["TBODY"] = 107] = "TBODY";
1099
+ TAG_ID[TAG_ID["TEMPLATE"] = 108] = "TEMPLATE";
1100
+ TAG_ID[TAG_ID["TEXTAREA"] = 109] = "TEXTAREA";
1101
+ TAG_ID[TAG_ID["TFOOT"] = 110] = "TFOOT";
1102
+ TAG_ID[TAG_ID["TD"] = 111] = "TD";
1103
+ TAG_ID[TAG_ID["TH"] = 112] = "TH";
1104
+ TAG_ID[TAG_ID["THEAD"] = 113] = "THEAD";
1105
+ TAG_ID[TAG_ID["TITLE"] = 114] = "TITLE";
1106
+ TAG_ID[TAG_ID["TR"] = 115] = "TR";
1107
+ TAG_ID[TAG_ID["TRACK"] = 116] = "TRACK";
1108
+ TAG_ID[TAG_ID["TT"] = 117] = "TT";
1109
+ TAG_ID[TAG_ID["U"] = 118] = "U";
1110
+ TAG_ID[TAG_ID["UL"] = 119] = "UL";
1111
+ TAG_ID[TAG_ID["SVG"] = 120] = "SVG";
1112
+ TAG_ID[TAG_ID["VAR"] = 121] = "VAR";
1113
+ TAG_ID[TAG_ID["WBR"] = 122] = "WBR";
1114
+ TAG_ID[TAG_ID["XMP"] = 123] = "XMP";
1115
+ })(TAG_ID || (TAG_ID = {}));
740
1116
  const TAG_NAME_TO_ID = new Map([
741
1117
  [TAG_NAMES.A, TAG_ID.A],
742
1118
  [TAG_NAMES.ADDRESS, TAG_ID.ADDRESS],
@@ -831,6 +1207,7 @@ const TAG_NAME_TO_ID = new Map([
831
1207
  [TAG_NAMES.RUBY, TAG_ID.RUBY],
832
1208
  [TAG_NAMES.S, TAG_ID.S],
833
1209
  [TAG_NAMES.SCRIPT, TAG_ID.SCRIPT],
1210
+ [TAG_NAMES.SEARCH, TAG_ID.SEARCH],
834
1211
  [TAG_NAMES.SECTION, TAG_ID.SECTION],
835
1212
  [TAG_NAMES.SELECT, TAG_ID.SELECT],
836
1213
  [TAG_NAMES.SOURCE, TAG_ID.SOURCE],
@@ -956,40 +1333,8 @@ const SPECIAL_ELEMENTS = {
956
1333
  [NS.XML]: new Set(),
957
1334
  [NS.XMLNS]: new Set(),
958
1335
  };
959
- function isNumberedHeader(tn) {
960
- return tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6;
961
- }
1336
+ const NUMBERED_HEADERS = new Set([$.H1, $.H2, $.H3, $.H4, $.H5, $.H6]);
962
1337
 
963
- //C1 Unicode control character reference replacements
964
- const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
965
- [0x80, 8364],
966
- [0x82, 8218],
967
- [0x83, 402],
968
- [0x84, 8222],
969
- [0x85, 8230],
970
- [0x86, 8224],
971
- [0x87, 8225],
972
- [0x88, 710],
973
- [0x89, 8240],
974
- [0x8a, 352],
975
- [0x8b, 8249],
976
- [0x8c, 338],
977
- [0x8e, 381],
978
- [0x91, 8216],
979
- [0x92, 8217],
980
- [0x93, 8220],
981
- [0x94, 8221],
982
- [0x95, 8226],
983
- [0x96, 8211],
984
- [0x97, 8212],
985
- [0x98, 732],
986
- [0x99, 8482],
987
- [0x9a, 353],
988
- [0x9b, 8250],
989
- [0x9c, 339],
990
- [0x9e, 382],
991
- [0x9f, 376],
992
- ]);
993
1338
  //States
994
1339
  var State;
995
1340
  (function (State) {
@@ -1065,13 +1410,7 @@ var State;
1065
1410
  State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
1066
1411
  State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
1067
1412
  State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
1068
- State[State["NAMED_CHARACTER_REFERENCE"] = 72] = "NAMED_CHARACTER_REFERENCE";
1069
- State[State["AMBIGUOUS_AMPERSAND"] = 73] = "AMBIGUOUS_AMPERSAND";
1070
- State[State["NUMERIC_CHARACTER_REFERENCE"] = 74] = "NUMERIC_CHARACTER_REFERENCE";
1071
- State[State["HEXADEMICAL_CHARACTER_REFERENCE_START"] = 75] = "HEXADEMICAL_CHARACTER_REFERENCE_START";
1072
- State[State["HEXADEMICAL_CHARACTER_REFERENCE"] = 76] = "HEXADEMICAL_CHARACTER_REFERENCE";
1073
- State[State["DECIMAL_CHARACTER_REFERENCE"] = 77] = "DECIMAL_CHARACTER_REFERENCE";
1074
- State[State["NUMERIC_CHARACTER_REFERENCE_END"] = 78] = "NUMERIC_CHARACTER_REFERENCE_END";
1413
+ State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND";
1075
1414
  })(State || (State = {}));
1076
1415
  //Tokenizer initial states for different modes
1077
1416
  const TokenizerMode = {
@@ -1101,27 +1440,33 @@ function isAsciiLetter(cp) {
1101
1440
  function isAsciiAlphaNumeric(cp) {
1102
1441
  return isAsciiLetter(cp) || isAsciiDigit(cp);
1103
1442
  }
1104
- function isAsciiUpperHexDigit(cp) {
1105
- return cp >= CODE_POINTS.LATIN_CAPITAL_A && cp <= CODE_POINTS.LATIN_CAPITAL_F;
1106
- }
1107
- function isAsciiLowerHexDigit(cp) {
1108
- return cp >= CODE_POINTS.LATIN_SMALL_A && cp <= CODE_POINTS.LATIN_SMALL_F;
1109
- }
1110
- function isAsciiHexDigit(cp) {
1111
- return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp);
1112
- }
1113
1443
  function toAsciiLower(cp) {
1114
1444
  return cp + 32;
1115
1445
  }
1116
1446
  function isWhitespace(cp) {
1117
1447
  return cp === CODE_POINTS.SPACE || cp === CODE_POINTS.LINE_FEED || cp === CODE_POINTS.TABULATION || cp === CODE_POINTS.FORM_FEED;
1118
1448
  }
1119
- function isEntityInAttributeInvalidEnd(nextCp) {
1120
- return nextCp === CODE_POINTS.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp);
1121
- }
1122
1449
  function isScriptDataDoubleEscapeSequenceEnd(cp) {
1123
1450
  return isWhitespace(cp) || cp === CODE_POINTS.SOLIDUS || cp === CODE_POINTS.GREATER_THAN_SIGN;
1124
1451
  }
1452
+ function getErrorForNumericCharacterReference(code) {
1453
+ if (code === CODE_POINTS.NULL) {
1454
+ return ERR.nullCharacterReference;
1455
+ }
1456
+ else if (code > 1114111) {
1457
+ return ERR.characterReferenceOutsideUnicodeRange;
1458
+ }
1459
+ else if (isSurrogate(code)) {
1460
+ return ERR.surrogateCharacterReference;
1461
+ }
1462
+ else if (isUndefinedCodePoint(code)) {
1463
+ return ERR.noncharacterCharacterReference;
1464
+ }
1465
+ else if (isControlCodePoint(code) || code === CODE_POINTS.CARRIAGE_RETURN) {
1466
+ return ERR.controlCharacterReference;
1467
+ }
1468
+ return null;
1469
+ }
1125
1470
  //Tokenizer
1126
1471
  class Tokenizer {
1127
1472
  constructor(options, handler) {
@@ -1141,18 +1486,38 @@ class Tokenizer {
1141
1486
  this.active = false;
1142
1487
  this.state = State.DATA;
1143
1488
  this.returnState = State.DATA;
1144
- this.charRefCode = -1;
1489
+ this.entityStartPos = 0;
1145
1490
  this.consumedAfterSnapshot = -1;
1146
1491
  this.currentCharacterToken = null;
1147
1492
  this.currentToken = null;
1148
1493
  this.currentAttr = { name: '', value: '' };
1149
1494
  this.preprocessor = new Preprocessor(handler);
1150
1495
  this.currentLocation = this.getCurrentLocation(-1);
1496
+ this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => {
1497
+ // Note: Set `pos` _before_ flushing, as flushing might drop
1498
+ // the current chunk and invalidate `entityStartPos`.
1499
+ this.preprocessor.pos = this.entityStartPos + consumed - 1;
1500
+ this._flushCodePointConsumedAsCharacterReference(cp);
1501
+ }, handler.onParseError
1502
+ ? {
1503
+ missingSemicolonAfterCharacterReference: () => {
1504
+ this._err(ERR.missingSemicolonAfterCharacterReference, 1);
1505
+ },
1506
+ absenceOfDigitsInNumericCharacterReference: (consumed) => {
1507
+ this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed);
1508
+ },
1509
+ validateNumericCharacterReference: (code) => {
1510
+ const error = getErrorForNumericCharacterReference(code);
1511
+ if (error)
1512
+ this._err(error, 1);
1513
+ },
1514
+ }
1515
+ : undefined);
1151
1516
  }
1152
1517
  //Errors
1153
- _err(code) {
1518
+ _err(code, cpOffset = 0) {
1154
1519
  var _a, _b;
1155
- (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code));
1520
+ (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset));
1156
1521
  }
1157
1522
  // NOTE: `offset` may never run across line boundaries.
1158
1523
  getCurrentLocation(offset) {
@@ -1214,7 +1579,8 @@ class Tokenizer {
1214
1579
  //Hibernation
1215
1580
  _ensureHibernation() {
1216
1581
  if (this.preprocessor.endOfChunkHit) {
1217
- this._unconsume(this.consumedAfterSnapshot);
1582
+ this.preprocessor.retreat(this.consumedAfterSnapshot);
1583
+ this.consumedAfterSnapshot = 0;
1218
1584
  this.active = false;
1219
1585
  return true;
1220
1586
  }
@@ -1225,14 +1591,6 @@ class Tokenizer {
1225
1591
  this.consumedAfterSnapshot++;
1226
1592
  return this.preprocessor.advance();
1227
1593
  }
1228
- _unconsume(count) {
1229
- this.consumedAfterSnapshot -= count;
1230
- this.preprocessor.retreat(count);
1231
- }
1232
- _reconsumeInState(state, cp) {
1233
- this.state = state;
1234
- this._callState(cp);
1235
- }
1236
1594
  _advanceBy(count) {
1237
1595
  this.consumedAfterSnapshot += count;
1238
1596
  for (let i = 0; i < count; i++) {
@@ -1404,7 +1762,7 @@ class Tokenizer {
1404
1762
  this.active = false;
1405
1763
  }
1406
1764
  //Characters emission
1407
- //OPTIMIZATION: specification uses only one type of character tokens (one token per character).
1765
+ //OPTIMIZATION: The specification uses only one type of character token (one token per character).
1408
1766
  //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
1409
1767
  //If we have a sequence of characters that belong to the same group, the parser can process it
1410
1768
  //as a single solid character token.
@@ -1414,15 +1772,15 @@ class Tokenizer {
1414
1772
  //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
1415
1773
  _appendCharToCurrentCharacterToken(type, ch) {
1416
1774
  if (this.currentCharacterToken) {
1417
- if (this.currentCharacterToken.type !== type) {
1775
+ if (this.currentCharacterToken.type === type) {
1776
+ this.currentCharacterToken.chars += ch;
1777
+ return;
1778
+ }
1779
+ else {
1418
1780
  this.currentLocation = this.getCurrentLocation(0);
1419
1781
  this._emitCurrentCharacterToken(this.currentLocation);
1420
1782
  this.preprocessor.dropParsedChunk();
1421
1783
  }
1422
- else {
1423
- this.currentCharacterToken.chars += ch;
1424
- return;
1425
- }
1426
1784
  }
1427
1785
  this._createCharacterToken(type, ch);
1428
1786
  }
@@ -1440,59 +1798,11 @@ class Tokenizer {
1440
1798
  this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
1441
1799
  }
1442
1800
  // Character reference helpers
1443
- _matchNamedCharacterReference(cp) {
1444
- let result = null;
1445
- let excess = 0;
1446
- let withoutSemicolon = false;
1447
- for (let i = 0, current = htmlDecodeTree[0]; i >= 0; cp = this._consume()) {
1448
- i = determineBranch(htmlDecodeTree, current, i + 1, cp);
1449
- if (i < 0)
1450
- break;
1451
- excess += 1;
1452
- current = htmlDecodeTree[i];
1453
- const masked = current & BinTrieFlags.VALUE_LENGTH;
1454
- // If the branch is a value, store it and continue
1455
- if (masked) {
1456
- // The mask is the number of bytes of the value, including the current byte.
1457
- const valueLength = (masked >> 14) - 1;
1458
- // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
1459
- // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
1460
- if (cp !== CODE_POINTS.SEMICOLON &&
1461
- this._isCharacterReferenceInAttribute() &&
1462
- isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))) {
1463
- //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after
1464
- //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes.
1465
- result = [CODE_POINTS.AMPERSAND];
1466
- // Skip over the value.
1467
- i += valueLength;
1468
- }
1469
- else {
1470
- // If this is a surrogate pair, consume the next two bytes.
1471
- result =
1472
- valueLength === 0
1473
- ? [htmlDecodeTree[i] & ~BinTrieFlags.VALUE_LENGTH]
1474
- : valueLength === 1
1475
- ? [htmlDecodeTree[++i]]
1476
- : [htmlDecodeTree[++i], htmlDecodeTree[++i]];
1477
- excess = 0;
1478
- withoutSemicolon = cp !== CODE_POINTS.SEMICOLON;
1479
- }
1480
- if (valueLength === 0) {
1481
- // If the value is zero-length, we're done.
1482
- this._consume();
1483
- break;
1484
- }
1485
- }
1486
- }
1487
- this._unconsume(excess);
1488
- if (withoutSemicolon && !this.preprocessor.endOfChunkHit) {
1489
- this._err(ERR.missingSemicolonAfterCharacterReference);
1490
- }
1491
- // We want to emit the error above on the code point after the entity.
1492
- // We always consume one code point too many in the loop, and we wait to
1493
- // unconsume it until after the error is emitted.
1494
- this._unconsume(1);
1495
- return result;
1801
+ _startCharacterReference() {
1802
+ this.returnState = this.state;
1803
+ this.state = State.CHARACTER_REFERENCE;
1804
+ this.entityStartPos = this.preprocessor.pos;
1805
+ this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy);
1496
1806
  }
1497
1807
  _isCharacterReferenceInAttribute() {
1498
1808
  return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
@@ -1795,37 +2105,13 @@ class Tokenizer {
1795
2105
  break;
1796
2106
  }
1797
2107
  case State.CHARACTER_REFERENCE: {
1798
- this._stateCharacterReference(cp);
1799
- break;
1800
- }
1801
- case State.NAMED_CHARACTER_REFERENCE: {
1802
- this._stateNamedCharacterReference(cp);
2108
+ this._stateCharacterReference();
1803
2109
  break;
1804
2110
  }
1805
2111
  case State.AMBIGUOUS_AMPERSAND: {
1806
2112
  this._stateAmbiguousAmpersand(cp);
1807
2113
  break;
1808
2114
  }
1809
- case State.NUMERIC_CHARACTER_REFERENCE: {
1810
- this._stateNumericCharacterReference(cp);
1811
- break;
1812
- }
1813
- case State.HEXADEMICAL_CHARACTER_REFERENCE_START: {
1814
- this._stateHexademicalCharacterReferenceStart(cp);
1815
- break;
1816
- }
1817
- case State.HEXADEMICAL_CHARACTER_REFERENCE: {
1818
- this._stateHexademicalCharacterReference(cp);
1819
- break;
1820
- }
1821
- case State.DECIMAL_CHARACTER_REFERENCE: {
1822
- this._stateDecimalCharacterReference(cp);
1823
- break;
1824
- }
1825
- case State.NUMERIC_CHARACTER_REFERENCE_END: {
1826
- this._stateNumericCharacterReferenceEnd(cp);
1827
- break;
1828
- }
1829
2115
  default: {
1830
2116
  throw new Error('Unknown state');
1831
2117
  }
@@ -1841,8 +2127,7 @@ class Tokenizer {
1841
2127
  break;
1842
2128
  }
1843
2129
  case CODE_POINTS.AMPERSAND: {
1844
- this.returnState = State.DATA;
1845
- this.state = State.CHARACTER_REFERENCE;
2130
+ this._startCharacterReference();
1846
2131
  break;
1847
2132
  }
1848
2133
  case CODE_POINTS.NULL: {
@@ -1864,8 +2149,7 @@ class Tokenizer {
1864
2149
  _stateRcdata(cp) {
1865
2150
  switch (cp) {
1866
2151
  case CODE_POINTS.AMPERSAND: {
1867
- this.returnState = State.RCDATA;
1868
- this.state = State.CHARACTER_REFERENCE;
2152
+ this._startCharacterReference();
1869
2153
  break;
1870
2154
  }
1871
2155
  case CODE_POINTS.LESS_THAN_SIGN: {
@@ -2634,8 +2918,7 @@ class Tokenizer {
2634
2918
  break;
2635
2919
  }
2636
2920
  case CODE_POINTS.AMPERSAND: {
2637
- this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
2638
- this.state = State.CHARACTER_REFERENCE;
2921
+ this._startCharacterReference();
2639
2922
  break;
2640
2923
  }
2641
2924
  case CODE_POINTS.NULL: {
@@ -2662,8 +2945,7 @@ class Tokenizer {
2662
2945
  break;
2663
2946
  }
2664
2947
  case CODE_POINTS.AMPERSAND: {
2665
- this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
2666
- this.state = State.CHARACTER_REFERENCE;
2948
+ this._startCharacterReference();
2667
2949
  break;
2668
2950
  }
2669
2951
  case CODE_POINTS.NULL: {
@@ -2694,8 +2976,7 @@ class Tokenizer {
2694
2976
  break;
2695
2977
  }
2696
2978
  case CODE_POINTS.AMPERSAND: {
2697
- this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED;
2698
- this.state = State.CHARACTER_REFERENCE;
2979
+ this._startCharacterReference();
2699
2980
  break;
2700
2981
  }
2701
2982
  case CODE_POINTS.GREATER_THAN_SIGN: {
@@ -3711,35 +3992,35 @@ class Tokenizer {
3711
3992
  }
3712
3993
  // Character reference state
3713
3994
  //------------------------------------------------------------------
3714
- _stateCharacterReference(cp) {
3715
- if (cp === CODE_POINTS.NUMBER_SIGN) {
3716
- this.state = State.NUMERIC_CHARACTER_REFERENCE;
3717
- }
3718
- else if (isAsciiAlphaNumeric(cp)) {
3719
- this.state = State.NAMED_CHARACTER_REFERENCE;
3720
- this._stateNamedCharacterReference(cp);
3995
+ _stateCharacterReference() {
3996
+ let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
3997
+ if (length < 0) {
3998
+ if (this.preprocessor.lastChunkWritten) {
3999
+ length = this.entityDecoder.end();
4000
+ }
4001
+ else {
4002
+ // Wait for the rest of the entity.
4003
+ this.active = false;
4004
+ // Mark the entire buffer as read.
4005
+ this.preprocessor.pos = this.preprocessor.html.length - 1;
4006
+ this.consumedAfterSnapshot = 0;
4007
+ this.preprocessor.endOfChunkHit = true;
4008
+ return;
4009
+ }
3721
4010
  }
3722
- else {
4011
+ if (length === 0) {
4012
+ // This was not a valid entity. Go back to the beginning, and
4013
+ // figure out what to do.
4014
+ this.preprocessor.pos = this.entityStartPos;
3723
4015
  this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3724
- this._reconsumeInState(this.returnState, cp);
3725
- }
3726
- }
3727
- // Named character reference state
3728
- //------------------------------------------------------------------
3729
- _stateNamedCharacterReference(cp) {
3730
- const matchResult = this._matchNamedCharacterReference(cp);
3731
- //NOTE: Matching can be abrupted by hibernation. In that case, match
3732
- //results are no longer valid and we will need to start over.
3733
- if (this._ensureHibernation()) ;
3734
- else if (matchResult) {
3735
- for (let i = 0; i < matchResult.length; i++) {
3736
- this._flushCodePointConsumedAsCharacterReference(matchResult[i]);
3737
- }
3738
- this.state = this.returnState;
4016
+ this.state =
4017
+ !this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
4018
+ ? State.AMBIGUOUS_AMPERSAND
4019
+ : this.returnState;
3739
4020
  }
3740
4021
  else {
3741
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3742
- this.state = State.AMBIGUOUS_AMPERSAND;
4022
+ // We successfully parsed an entity. Switch to the return state.
4023
+ this.state = this.returnState;
3743
4024
  }
3744
4025
  }
3745
4026
  // Ambiguos ampersand state
@@ -3752,107 +4033,10 @@ class Tokenizer {
3752
4033
  if (cp === CODE_POINTS.SEMICOLON) {
3753
4034
  this._err(ERR.unknownNamedCharacterReference);
3754
4035
  }
3755
- this._reconsumeInState(this.returnState, cp);
3756
- }
3757
- }
3758
- // Numeric character reference state
3759
- //------------------------------------------------------------------
3760
- _stateNumericCharacterReference(cp) {
3761
- this.charRefCode = 0;
3762
- if (cp === CODE_POINTS.LATIN_SMALL_X || cp === CODE_POINTS.LATIN_CAPITAL_X) {
3763
- this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
3764
- }
3765
- // Inlined decimal character reference start state
3766
- else if (isAsciiDigit(cp)) {
3767
- this.state = State.DECIMAL_CHARACTER_REFERENCE;
3768
- this._stateDecimalCharacterReference(cp);
3769
- }
3770
- else {
3771
- this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3772
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3773
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.NUMBER_SIGN);
3774
- this._reconsumeInState(this.returnState, cp);
3775
- }
3776
- }
3777
- // Hexademical character reference start state
3778
- //------------------------------------------------------------------
3779
- _stateHexademicalCharacterReferenceStart(cp) {
3780
- if (isAsciiHexDigit(cp)) {
3781
- this.state = State.HEXADEMICAL_CHARACTER_REFERENCE;
3782
- this._stateHexademicalCharacterReference(cp);
3783
- }
3784
- else {
3785
- this._err(ERR.absenceOfDigitsInNumericCharacterReference);
3786
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
3787
- this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.NUMBER_SIGN);
3788
- this._unconsume(2);
3789
4036
  this.state = this.returnState;
4037
+ this._callState(cp);
3790
4038
  }
3791
4039
  }
3792
- // Hexademical character reference state
3793
- //------------------------------------------------------------------
3794
- _stateHexademicalCharacterReference(cp) {
3795
- if (isAsciiUpperHexDigit(cp)) {
3796
- this.charRefCode = this.charRefCode * 16 + cp - 0x37;
3797
- }
3798
- else if (isAsciiLowerHexDigit(cp)) {
3799
- this.charRefCode = this.charRefCode * 16 + cp - 0x57;
3800
- }
3801
- else if (isAsciiDigit(cp)) {
3802
- this.charRefCode = this.charRefCode * 16 + cp - 0x30;
3803
- }
3804
- else if (cp === CODE_POINTS.SEMICOLON) {
3805
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3806
- }
3807
- else {
3808
- this._err(ERR.missingSemicolonAfterCharacterReference);
3809
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3810
- this._stateNumericCharacterReferenceEnd(cp);
3811
- }
3812
- }
3813
- // Decimal character reference state
3814
- //------------------------------------------------------------------
3815
- _stateDecimalCharacterReference(cp) {
3816
- if (isAsciiDigit(cp)) {
3817
- this.charRefCode = this.charRefCode * 10 + cp - 0x30;
3818
- }
3819
- else if (cp === CODE_POINTS.SEMICOLON) {
3820
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3821
- }
3822
- else {
3823
- this._err(ERR.missingSemicolonAfterCharacterReference);
3824
- this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
3825
- this._stateNumericCharacterReferenceEnd(cp);
3826
- }
3827
- }
3828
- // Numeric character reference end state
3829
- //------------------------------------------------------------------
3830
- _stateNumericCharacterReferenceEnd(cp) {
3831
- if (this.charRefCode === CODE_POINTS.NULL) {
3832
- this._err(ERR.nullCharacterReference);
3833
- this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
3834
- }
3835
- else if (this.charRefCode > 1114111) {
3836
- this._err(ERR.characterReferenceOutsideUnicodeRange);
3837
- this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
3838
- }
3839
- else if (isSurrogate(this.charRefCode)) {
3840
- this._err(ERR.surrogateCharacterReference);
3841
- this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
3842
- }
3843
- else if (isUndefinedCodePoint(this.charRefCode)) {
3844
- this._err(ERR.noncharacterCharacterReference);
3845
- }
3846
- else if (isControlCodePoint(this.charRefCode) || this.charRefCode === CODE_POINTS.CARRIAGE_RETURN) {
3847
- this._err(ERR.controlCharacterReference);
3848
- const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode);
3849
- if (replacement !== undefined) {
3850
- this.charRefCode = replacement;
3851
- }
3852
- }
3853
- this._flushCodePointConsumedAsCharacterReference(this.charRefCode);
3854
- this._reconsumeInState(this.returnState, cp);
3855
- }
3856
4040
  }
3857
4041
 
3858
4042
  //Element utils
@@ -3868,31 +4052,25 @@ const IMPLICIT_END_TAG_REQUIRED_THOROUGHLY = new Set([
3868
4052
  TAG_ID.THEAD,
3869
4053
  TAG_ID.TR,
3870
4054
  ]);
3871
- const SCOPING_ELEMENT_NS = new Map([
3872
- [TAG_ID.APPLET, NS.HTML],
3873
- [TAG_ID.CAPTION, NS.HTML],
3874
- [TAG_ID.HTML, NS.HTML],
3875
- [TAG_ID.MARQUEE, NS.HTML],
3876
- [TAG_ID.OBJECT, NS.HTML],
3877
- [TAG_ID.TABLE, NS.HTML],
3878
- [TAG_ID.TD, NS.HTML],
3879
- [TAG_ID.TEMPLATE, NS.HTML],
3880
- [TAG_ID.TH, NS.HTML],
3881
- [TAG_ID.ANNOTATION_XML, NS.MATHML],
3882
- [TAG_ID.MI, NS.MATHML],
3883
- [TAG_ID.MN, NS.MATHML],
3884
- [TAG_ID.MO, NS.MATHML],
3885
- [TAG_ID.MS, NS.MATHML],
3886
- [TAG_ID.MTEXT, NS.MATHML],
3887
- [TAG_ID.DESC, NS.SVG],
3888
- [TAG_ID.FOREIGN_OBJECT, NS.SVG],
3889
- [TAG_ID.TITLE, NS.SVG],
4055
+ const SCOPING_ELEMENTS_HTML = new Set([
4056
+ TAG_ID.APPLET,
4057
+ TAG_ID.CAPTION,
4058
+ TAG_ID.HTML,
4059
+ TAG_ID.MARQUEE,
4060
+ TAG_ID.OBJECT,
4061
+ TAG_ID.TABLE,
4062
+ TAG_ID.TD,
4063
+ TAG_ID.TEMPLATE,
4064
+ TAG_ID.TH,
3890
4065
  ]);
3891
- const NAMED_HEADERS = [TAG_ID.H1, TAG_ID.H2, TAG_ID.H3, TAG_ID.H4, TAG_ID.H5, TAG_ID.H6];
3892
- const TABLE_ROW_CONTEXT = [TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML];
3893
- const TABLE_BODY_CONTEXT = [TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML];
3894
- const TABLE_CONTEXT = [TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML];
3895
- const TABLE_CELLS = [TAG_ID.TD, TAG_ID.TH];
4066
+ const SCOPING_ELEMENTS_HTML_LIST = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.OL, TAG_ID.UL]);
4067
+ const SCOPING_ELEMENTS_HTML_BUTTON = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.BUTTON]);
4068
+ const SCOPING_ELEMENTS_MATHML = new Set([TAG_ID.ANNOTATION_XML, TAG_ID.MI, TAG_ID.MN, TAG_ID.MO, TAG_ID.MS, TAG_ID.MTEXT]);
4069
+ const SCOPING_ELEMENTS_SVG = new Set([TAG_ID.DESC, TAG_ID.FOREIGN_OBJECT, TAG_ID.TITLE]);
4070
+ const TABLE_ROW_CONTEXT = new Set([TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML]);
4071
+ const TABLE_BODY_CONTEXT = new Set([TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML]);
4072
+ const TABLE_CONTEXT = new Set([TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML]);
4073
+ const TABLE_CELLS = new Set([TAG_ID.TD, TAG_ID.TH]);
3896
4074
  //Stack of open elements
3897
4075
  class OpenElementStack {
3898
4076
  get currentTmplContentOrNode() {
@@ -3985,7 +4163,7 @@ class OpenElementStack {
3985
4163
  this.shortenToLength(idx < 0 ? 0 : idx);
3986
4164
  }
3987
4165
  popUntilNumberedHeaderPopped() {
3988
- this.popUntilPopped(NAMED_HEADERS, NS.HTML);
4166
+ this.popUntilPopped(NUMBERED_HEADERS, NS.HTML);
3989
4167
  }
3990
4168
  popUntilTableCellPopped() {
3991
4169
  this.popUntilPopped(TABLE_CELLS, NS.HTML);
@@ -3998,7 +4176,7 @@ class OpenElementStack {
3998
4176
  }
3999
4177
  _indexOfTagNames(tagNames, namespace) {
4000
4178
  for (let i = this.stackTop; i >= 0; i--) {
4001
- if (tagNames.includes(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) {
4179
+ if (tagNames.has(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) {
4002
4180
  return i;
4003
4181
  }
4004
4182
  }
@@ -4048,102 +4226,117 @@ class OpenElementStack {
4048
4226
  return this.stackTop === 0 && this.tagIDs[0] === TAG_ID.HTML;
4049
4227
  }
4050
4228
  //Element in scope
4051
- hasInScope(tagName) {
4229
+ hasInDynamicScope(tagName, htmlScope) {
4052
4230
  for (let i = this.stackTop; i >= 0; i--) {
4053
4231
  const tn = this.tagIDs[i];
4054
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4055
- if (tn === tagName && ns === NS.HTML) {
4056
- return true;
4057
- }
4058
- if (SCOPING_ELEMENT_NS.get(tn) === ns) {
4059
- return false;
4232
+ switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
4233
+ case NS.HTML: {
4234
+ if (tn === tagName)
4235
+ return true;
4236
+ if (htmlScope.has(tn))
4237
+ return false;
4238
+ break;
4239
+ }
4240
+ case NS.SVG: {
4241
+ if (SCOPING_ELEMENTS_SVG.has(tn))
4242
+ return false;
4243
+ break;
4244
+ }
4245
+ case NS.MATHML: {
4246
+ if (SCOPING_ELEMENTS_MATHML.has(tn))
4247
+ return false;
4248
+ break;
4249
+ }
4060
4250
  }
4061
4251
  }
4062
4252
  return true;
4063
4253
  }
4064
- hasNumberedHeaderInScope() {
4065
- for (let i = this.stackTop; i >= 0; i--) {
4066
- const tn = this.tagIDs[i];
4067
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4068
- if (isNumberedHeader(tn) && ns === NS.HTML) {
4069
- return true;
4070
- }
4071
- if (SCOPING_ELEMENT_NS.get(tn) === ns) {
4072
- return false;
4073
- }
4074
- }
4075
- return true;
4254
+ hasInScope(tagName) {
4255
+ return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML);
4076
4256
  }
4077
4257
  hasInListItemScope(tagName) {
4078
- for (let i = this.stackTop; i >= 0; i--) {
4079
- const tn = this.tagIDs[i];
4080
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4081
- if (tn === tagName && ns === NS.HTML) {
4082
- return true;
4083
- }
4084
- if (((tn === TAG_ID.UL || tn === TAG_ID.OL) && ns === NS.HTML) || SCOPING_ELEMENT_NS.get(tn) === ns) {
4085
- return false;
4086
- }
4087
- }
4088
- return true;
4258
+ return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_LIST);
4089
4259
  }
4090
4260
  hasInButtonScope(tagName) {
4261
+ return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_BUTTON);
4262
+ }
4263
+ hasNumberedHeaderInScope() {
4091
4264
  for (let i = this.stackTop; i >= 0; i--) {
4092
4265
  const tn = this.tagIDs[i];
4093
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4094
- if (tn === tagName && ns === NS.HTML) {
4095
- return true;
4096
- }
4097
- if ((tn === TAG_ID.BUTTON && ns === NS.HTML) || SCOPING_ELEMENT_NS.get(tn) === ns) {
4098
- return false;
4266
+ switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
4267
+ case NS.HTML: {
4268
+ if (NUMBERED_HEADERS.has(tn))
4269
+ return true;
4270
+ if (SCOPING_ELEMENTS_HTML.has(tn))
4271
+ return false;
4272
+ break;
4273
+ }
4274
+ case NS.SVG: {
4275
+ if (SCOPING_ELEMENTS_SVG.has(tn))
4276
+ return false;
4277
+ break;
4278
+ }
4279
+ case NS.MATHML: {
4280
+ if (SCOPING_ELEMENTS_MATHML.has(tn))
4281
+ return false;
4282
+ break;
4283
+ }
4099
4284
  }
4100
4285
  }
4101
4286
  return true;
4102
4287
  }
4103
4288
  hasInTableScope(tagName) {
4104
4289
  for (let i = this.stackTop; i >= 0; i--) {
4105
- const tn = this.tagIDs[i];
4106
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4107
- if (ns !== NS.HTML) {
4290
+ if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
4108
4291
  continue;
4109
4292
  }
4110
- if (tn === tagName) {
4111
- return true;
4112
- }
4113
- if (tn === TAG_ID.TABLE || tn === TAG_ID.TEMPLATE || tn === TAG_ID.HTML) {
4114
- return false;
4293
+ switch (this.tagIDs[i]) {
4294
+ case tagName: {
4295
+ return true;
4296
+ }
4297
+ case TAG_ID.TABLE:
4298
+ case TAG_ID.HTML: {
4299
+ return false;
4300
+ }
4115
4301
  }
4116
4302
  }
4117
4303
  return true;
4118
4304
  }
4119
4305
  hasTableBodyContextInTableScope() {
4120
4306
  for (let i = this.stackTop; i >= 0; i--) {
4121
- const tn = this.tagIDs[i];
4122
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4123
- if (ns !== NS.HTML) {
4307
+ if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
4124
4308
  continue;
4125
4309
  }
4126
- if (tn === TAG_ID.TBODY || tn === TAG_ID.THEAD || tn === TAG_ID.TFOOT) {
4127
- return true;
4128
- }
4129
- if (tn === TAG_ID.TABLE || tn === TAG_ID.HTML) {
4130
- return false;
4310
+ switch (this.tagIDs[i]) {
4311
+ case TAG_ID.TBODY:
4312
+ case TAG_ID.THEAD:
4313
+ case TAG_ID.TFOOT: {
4314
+ return true;
4315
+ }
4316
+ case TAG_ID.TABLE:
4317
+ case TAG_ID.HTML: {
4318
+ return false;
4319
+ }
4131
4320
  }
4132
4321
  }
4133
4322
  return true;
4134
4323
  }
4135
4324
  hasInSelectScope(tagName) {
4136
4325
  for (let i = this.stackTop; i >= 0; i--) {
4137
- const tn = this.tagIDs[i];
4138
- const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
4139
- if (ns !== NS.HTML) {
4326
+ if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
4140
4327
  continue;
4141
4328
  }
4142
- if (tn === tagName) {
4143
- return true;
4144
- }
4145
- if (tn !== TAG_ID.OPTION && tn !== TAG_ID.OPTGROUP) {
4146
- return false;
4329
+ switch (this.tagIDs[i]) {
4330
+ case tagName: {
4331
+ return true;
4332
+ }
4333
+ case TAG_ID.OPTION:
4334
+ case TAG_ID.OPTGROUP: {
4335
+ break;
4336
+ }
4337
+ default: {
4338
+ return false;
4339
+ }
4147
4340
  }
4148
4341
  }
4149
4342
  return true;
@@ -4172,7 +4365,7 @@ var EntryType;
4172
4365
  (function (EntryType) {
4173
4366
  EntryType[EntryType["Marker"] = 0] = "Marker";
4174
4367
  EntryType[EntryType["Element"] = 1] = "Element";
4175
- })(EntryType = EntryType || (EntryType = {}));
4368
+ })(EntryType || (EntryType = {}));
4176
4369
  const MARKER = { type: EntryType.Marker };
4177
4370
  //List of formatting elements
4178
4371
  class FormattingElementList {
@@ -4277,13 +4470,6 @@ class FormattingElementList {
4277
4470
  }
4278
4471
  }
4279
4472
 
4280
- function createTextNode(value) {
4281
- return {
4282
- nodeName: '#text',
4283
- value,
4284
- parentNode: null,
4285
- };
4286
- }
4287
4473
  const defaultTreeAdapter = {
4288
4474
  //Node construction
4289
4475
  createDocument() {
@@ -4316,6 +4502,13 @@ const defaultTreeAdapter = {
4316
4502
  parentNode: null,
4317
4503
  };
4318
4504
  },
4505
+ createTextNode(value) {
4506
+ return {
4507
+ nodeName: '#text',
4508
+ value,
4509
+ parentNode: null,
4510
+ };
4511
+ },
4319
4512
  //Tree mutation
4320
4513
  appendChild(parentNode, newNode) {
4321
4514
  parentNode.childNodes.push(newNode);
@@ -4371,7 +4564,7 @@ const defaultTreeAdapter = {
4371
4564
  return;
4372
4565
  }
4373
4566
  }
4374
- defaultTreeAdapter.appendChild(parentNode, createTextNode(text));
4567
+ defaultTreeAdapter.appendChild(parentNode, defaultTreeAdapter.createTextNode(text));
4375
4568
  },
4376
4569
  insertTextBefore(parentNode, text, referenceNode) {
4377
4570
  const prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];
@@ -4379,7 +4572,7 @@ const defaultTreeAdapter = {
4379
4572
  prevNode.value += text;
4380
4573
  }
4381
4574
  else {
4382
- defaultTreeAdapter.insertBefore(parentNode, createTextNode(text), referenceNode);
4575
+ defaultTreeAdapter.insertBefore(parentNode, defaultTreeAdapter.createTextNode(text), referenceNode);
4383
4576
  }
4384
4577
  },
4385
4578
  adoptAttributes(recipient, attrs) {
@@ -4640,7 +4833,6 @@ const XML_ATTRS_ADJUSTMENT_MAP = new Map([
4640
4833
  ['xlink:show', { prefix: 'xlink', name: 'show', namespace: NS.XLINK }],
4641
4834
  ['xlink:title', { prefix: 'xlink', name: 'title', namespace: NS.XLINK }],
4642
4835
  ['xlink:type', { prefix: 'xlink', name: 'type', namespace: NS.XLINK }],
4643
- ['xml:base', { prefix: 'xml', name: 'base', namespace: NS.XML }],
4644
4836
  ['xml:lang', { prefix: 'xml', name: 'lang', namespace: NS.XML }],
4645
4837
  ['xml:space', { prefix: 'xml', name: 'space', namespace: NS.XML }],
4646
4838
  ['xmlns', { prefix: '', name: 'xmlns', namespace: NS.XMLNS }],
@@ -4842,26 +5034,41 @@ const defaultParserOptions = {
4842
5034
  };
4843
5035
  //Parser
4844
5036
  class Parser {
4845
- constructor(options, document, fragmentContext = null, scriptHandler = null) {
5037
+ constructor(options, document,
5038
+ /** @internal */
5039
+ fragmentContext = null,
5040
+ /** @internal */
5041
+ scriptHandler = null) {
4846
5042
  this.fragmentContext = fragmentContext;
4847
5043
  this.scriptHandler = scriptHandler;
4848
5044
  this.currentToken = null;
4849
5045
  this.stopped = false;
5046
+ /** @internal */
4850
5047
  this.insertionMode = InsertionMode.INITIAL;
5048
+ /** @internal */
4851
5049
  this.originalInsertionMode = InsertionMode.INITIAL;
5050
+ /** @internal */
4852
5051
  this.headElement = null;
5052
+ /** @internal */
4853
5053
  this.formElement = null;
4854
5054
  /** Indicates that the current node is not an element in the HTML namespace */
4855
5055
  this.currentNotInHTML = false;
4856
5056
  /**
4857
5057
  * The template insertion mode stack is maintained from the left.
4858
5058
  * Ie. the topmost element will always have index 0.
5059
+ *
5060
+ * @internal
4859
5061
  */
4860
5062
  this.tmplInsertionModeStack = [];
5063
+ /** @internal */
4861
5064
  this.pendingCharacterTokens = [];
5065
+ /** @internal */
4862
5066
  this.hasNonWhitespacePendingCharacterToken = false;
5067
+ /** @internal */
4863
5068
  this.framesetOk = true;
5069
+ /** @internal */
4864
5070
  this.skipNextNewLine = false;
5071
+ /** @internal */
4865
5072
  this.fosterParentingEnabled = false;
4866
5073
  this.options = {
4867
5074
  ...defaultParserOptions,
@@ -4915,6 +5122,7 @@ class Parser {
4915
5122
  return fragment;
4916
5123
  }
4917
5124
  //Errors
5125
+ /** @internal */
4918
5126
  _err(token, code, beforeToken) {
4919
5127
  var _a;
4920
5128
  if (!this.onParseError)
@@ -4932,12 +5140,14 @@ class Parser {
4932
5140
  this.onParseError(err);
4933
5141
  }
4934
5142
  //Stack events
5143
+ /** @internal */
4935
5144
  onItemPush(node, tid, isTop) {
4936
5145
  var _a, _b;
4937
5146
  (_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node);
4938
5147
  if (isTop && this.openElements.stackTop > 0)
4939
5148
  this._setContextModes(node, tid);
4940
5149
  }
5150
+ /** @internal */
4941
5151
  onItemPop(node, isTop) {
4942
5152
  var _a, _b;
4943
5153
  if (this.options.sourceCodeLocationInfo) {
@@ -4962,6 +5172,7 @@ class Parser {
4962
5172
  this.currentNotInHTML = !isHTML;
4963
5173
  this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current);
4964
5174
  }
5175
+ /** @protected */
4965
5176
  _switchToTextParsing(currentToken, nextTokenizerState) {
4966
5177
  this._insertElement(currentToken, NS.HTML);
4967
5178
  this.tokenizer.state = nextTokenizerState;
@@ -4974,11 +5185,13 @@ class Parser {
4974
5185
  this.tokenizer.state = TokenizerMode.PLAINTEXT;
4975
5186
  }
4976
5187
  //Fragment parsing
5188
+ /** @protected */
4977
5189
  _getAdjustedCurrentElement() {
4978
5190
  return this.openElements.stackTop === 0 && this.fragmentContext
4979
5191
  ? this.fragmentContext
4980
5192
  : this.openElements.current;
4981
5193
  }
5194
+ /** @protected */
4982
5195
  _findFormInFragmentContext() {
4983
5196
  let node = this.fragmentContext;
4984
5197
  while (node) {
@@ -5020,6 +5233,7 @@ class Parser {
5020
5233
  }
5021
5234
  }
5022
5235
  //Tree mutation
5236
+ /** @protected */
5023
5237
  _setDocumentType(token) {
5024
5238
  const name = token.name || '';
5025
5239
  const publicId = token.publicId || '';
@@ -5033,6 +5247,7 @@ class Parser {
5033
5247
  }
5034
5248
  }
5035
5249
  }
5250
+ /** @protected */
5036
5251
  _attachElementToTree(element, location) {
5037
5252
  if (this.options.sourceCodeLocationInfo) {
5038
5253
  const loc = location && {
@@ -5049,20 +5264,28 @@ class Parser {
5049
5264
  this.treeAdapter.appendChild(parent, element);
5050
5265
  }
5051
5266
  }
5267
+ /**
5268
+ * For self-closing tags. Add an element to the tree, but skip adding it
5269
+ * to the stack.
5270
+ */
5271
+ /** @protected */
5052
5272
  _appendElement(token, namespaceURI) {
5053
5273
  const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
5054
5274
  this._attachElementToTree(element, token.location);
5055
5275
  }
5276
+ /** @protected */
5056
5277
  _insertElement(token, namespaceURI) {
5057
5278
  const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
5058
5279
  this._attachElementToTree(element, token.location);
5059
5280
  this.openElements.push(element, token.tagID);
5060
5281
  }
5282
+ /** @protected */
5061
5283
  _insertFakeElement(tagName, tagID) {
5062
5284
  const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
5063
5285
  this._attachElementToTree(element, null);
5064
5286
  this.openElements.push(element, tagID);
5065
5287
  }
5288
+ /** @protected */
5066
5289
  _insertTemplate(token) {
5067
5290
  const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
5068
5291
  const content = this.treeAdapter.createDocumentFragment();
@@ -5072,6 +5295,7 @@ class Parser {
5072
5295
  if (this.options.sourceCodeLocationInfo)
5073
5296
  this.treeAdapter.setNodeSourceCodeLocation(content, null);
5074
5297
  }
5298
+ /** @protected */
5075
5299
  _insertFakeRootElement() {
5076
5300
  const element = this.treeAdapter.createElement(TAG_NAMES.HTML, NS.HTML, []);
5077
5301
  if (this.options.sourceCodeLocationInfo)
@@ -5079,6 +5303,7 @@ class Parser {
5079
5303
  this.treeAdapter.appendChild(this.openElements.current, element);
5080
5304
  this.openElements.push(element, TAG_ID.HTML);
5081
5305
  }
5306
+ /** @protected */
5082
5307
  _appendCommentNode(token, parent) {
5083
5308
  const commentNode = this.treeAdapter.createCommentNode(token.data);
5084
5309
  this.treeAdapter.appendChild(parent, commentNode);
@@ -5086,6 +5311,7 @@ class Parser {
5086
5311
  this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
5087
5312
  }
5088
5313
  }
5314
+ /** @protected */
5089
5315
  _insertCharacters(token) {
5090
5316
  let parent;
5091
5317
  let beforeElement;
@@ -5117,12 +5343,14 @@ class Parser {
5117
5343
  this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
5118
5344
  }
5119
5345
  }
5346
+ /** @protected */
5120
5347
  _adoptNodes(donor, recipient) {
5121
5348
  for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
5122
5349
  this.treeAdapter.detachNode(child);
5123
5350
  this.treeAdapter.appendChild(recipient, child);
5124
5351
  }
5125
5352
  }
5353
+ /** @protected */
5126
5354
  _setEndLocation(element, closingToken) {
5127
5355
  if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) {
5128
5356
  const ctLoc = closingToken.location;
@@ -5172,6 +5400,7 @@ class Parser {
5172
5400
  ((token.tagID === TAG_ID.MGLYPH || token.tagID === TAG_ID.MALIGNMARK) &&
5173
5401
  !this._isIntegrationPoint(currentTagId, current, NS.HTML)));
5174
5402
  }
5403
+ /** @protected */
5175
5404
  _processToken(token) {
5176
5405
  switch (token.type) {
5177
5406
  case TokenType.CHARACTER: {
@@ -5209,12 +5438,14 @@ class Parser {
5209
5438
  }
5210
5439
  }
5211
5440
  //Integration points
5441
+ /** @protected */
5212
5442
  _isIntegrationPoint(tid, element, foreignNS) {
5213
5443
  const ns = this.treeAdapter.getNamespaceURI(element);
5214
5444
  const attrs = this.treeAdapter.getAttrList(element);
5215
5445
  return isIntegrationPoint(tid, ns, attrs, foreignNS);
5216
5446
  }
5217
5447
  //Active formatting elements reconstruction
5448
+ /** @protected */
5218
5449
  _reconstructActiveFormattingElements() {
5219
5450
  const listLength = this.activeFormattingElements.entries.length;
5220
5451
  if (listLength) {
@@ -5228,17 +5459,20 @@ class Parser {
5228
5459
  }
5229
5460
  }
5230
5461
  //Close elements
5462
+ /** @protected */
5231
5463
  _closeTableCell() {
5232
5464
  this.openElements.generateImpliedEndTags();
5233
5465
  this.openElements.popUntilTableCellPopped();
5234
5466
  this.activeFormattingElements.clearToLastMarker();
5235
5467
  this.insertionMode = InsertionMode.IN_ROW;
5236
5468
  }
5469
+ /** @protected */
5237
5470
  _closePElement() {
5238
5471
  this.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.P);
5239
5472
  this.openElements.popUntilTagNamePopped(TAG_ID.P);
5240
5473
  }
5241
5474
  //Insertion modes
5475
+ /** @protected */
5242
5476
  _resetInsertionMode() {
5243
5477
  for (let i = this.openElements.stackTop; i >= 0; i--) {
5244
5478
  //Insertion mode reset map
@@ -5304,6 +5538,7 @@ class Parser {
5304
5538
  }
5305
5539
  this.insertionMode = InsertionMode.IN_BODY;
5306
5540
  }
5541
+ /** @protected */
5307
5542
  _resetInsertionModeForSelect(selectIdx) {
5308
5543
  if (selectIdx > 0) {
5309
5544
  for (let i = selectIdx - 1; i > 0; i--) {
@@ -5320,12 +5555,15 @@ class Parser {
5320
5555
  this.insertionMode = InsertionMode.IN_SELECT;
5321
5556
  }
5322
5557
  //Foster parenting
5558
+ /** @protected */
5323
5559
  _isElementCausesFosterParenting(tn) {
5324
5560
  return TABLE_STRUCTURE_TAGS.has(tn);
5325
5561
  }
5562
+ /** @protected */
5326
5563
  _shouldFosterParentOnInsertion() {
5327
5564
  return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId);
5328
5565
  }
5566
+ /** @protected */
5329
5567
  _findFosterParentingLocation() {
5330
5568
  for (let i = this.openElements.stackTop; i >= 0; i--) {
5331
5569
  const openElement = this.openElements.items[i];
@@ -5348,6 +5586,7 @@ class Parser {
5348
5586
  }
5349
5587
  return { parent: this.openElements.items[0], beforeElement: null };
5350
5588
  }
5589
+ /** @protected */
5351
5590
  _fosterParentElement(element) {
5352
5591
  const location = this._findFosterParentingLocation();
5353
5592
  if (location.beforeElement) {
@@ -5358,10 +5597,12 @@ class Parser {
5358
5597
  }
5359
5598
  }
5360
5599
  //Special elements
5600
+ /** @protected */
5361
5601
  _isSpecialElement(element, id) {
5362
5602
  const ns = this.treeAdapter.getNamespaceURI(element);
5363
5603
  return SPECIAL_ELEMENTS[ns].has(id);
5364
5604
  }
5605
+ /** @internal */
5365
5606
  onCharacter(token) {
5366
5607
  this.skipNextNewLine = false;
5367
5608
  if (this.tokenizer.inForeignNode) {
@@ -5431,6 +5672,7 @@ class Parser {
5431
5672
  // Do nothing
5432
5673
  }
5433
5674
  }
5675
+ /** @internal */
5434
5676
  onNullCharacter(token) {
5435
5677
  this.skipNextNewLine = false;
5436
5678
  if (this.tokenizer.inForeignNode) {
@@ -5487,6 +5729,7 @@ class Parser {
5487
5729
  // Do nothing
5488
5730
  }
5489
5731
  }
5732
+ /** @internal */
5490
5733
  onComment(token) {
5491
5734
  this.skipNextNewLine = false;
5492
5735
  if (this.currentNotInHTML) {
@@ -5531,6 +5774,7 @@ class Parser {
5531
5774
  // Do nothing
5532
5775
  }
5533
5776
  }
5777
+ /** @internal */
5534
5778
  onDoctype(token) {
5535
5779
  this.skipNextNewLine = false;
5536
5780
  switch (this.insertionMode) {
@@ -5552,6 +5796,7 @@ class Parser {
5552
5796
  // Do nothing
5553
5797
  }
5554
5798
  }
5799
+ /** @internal */
5555
5800
  onStartTag(token) {
5556
5801
  this.skipNextNewLine = false;
5557
5802
  this.currentToken = token;
@@ -5569,6 +5814,7 @@ class Parser {
5569
5814
  * for nested calls.
5570
5815
  *
5571
5816
  * @param token The token to process.
5817
+ * @protected
5572
5818
  */
5573
5819
  _processStartTag(token) {
5574
5820
  if (this.shouldProcessStartTagTokenInForeignContent(token)) {
@@ -5578,6 +5824,7 @@ class Parser {
5578
5824
  this._startTagOutsideForeignContent(token);
5579
5825
  }
5580
5826
  }
5827
+ /** @protected */
5581
5828
  _startTagOutsideForeignContent(token) {
5582
5829
  switch (this.insertionMode) {
5583
5830
  case InsertionMode.INITIAL: {
@@ -5671,6 +5918,7 @@ class Parser {
5671
5918
  // Do nothing
5672
5919
  }
5673
5920
  }
5921
+ /** @internal */
5674
5922
  onEndTag(token) {
5675
5923
  this.skipNextNewLine = false;
5676
5924
  this.currentToken = token;
@@ -5681,6 +5929,7 @@ class Parser {
5681
5929
  this._endTagOutsideForeignContent(token);
5682
5930
  }
5683
5931
  }
5932
+ /** @protected */
5684
5933
  _endTagOutsideForeignContent(token) {
5685
5934
  switch (this.insertionMode) {
5686
5935
  case InsertionMode.INITIAL: {
@@ -5774,6 +6023,7 @@ class Parser {
5774
6023
  // Do nothing
5775
6024
  }
5776
6025
  }
6026
+ /** @internal */
5777
6027
  onEof(token) {
5778
6028
  switch (this.insertionMode) {
5779
6029
  case InsertionMode.INITIAL: {
@@ -5835,6 +6085,7 @@ class Parser {
5835
6085
  // Do nothing
5836
6086
  }
5837
6087
  }
6088
+ /** @internal */
5838
6089
  onWhitespaceCharacter(token) {
5839
6090
  if (this.skipNextNewLine) {
5840
6091
  this.skipNextNewLine = false;
@@ -6405,7 +6656,7 @@ function numberedHeaderStartTagInBody(p, token) {
6405
6656
  if (p.openElements.hasInButtonScope(TAG_ID.P)) {
6406
6657
  p._closePElement();
6407
6658
  }
6408
- if (isNumberedHeader(p.openElements.currentTagId)) {
6659
+ if (NUMBERED_HEADERS.has(p.openElements.currentTagId)) {
6409
6660
  p.openElements.pop();
6410
6661
  }
6411
6662
  p._insertElement(token, NS.HTML);
@@ -6567,9 +6818,9 @@ function iframeStartTagInBody(p, token) {
6567
6818
  p.framesetOk = false;
6568
6819
  p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
6569
6820
  }
6570
- //NOTE: here we assume that we always act as an user agent with enabled plugins, so we parse
6571
- //<noembed> as rawtext.
6572
- function noembedStartTagInBody(p, token) {
6821
+ //NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse
6822
+ //<noembed>/<noframes> as rawtext.
6823
+ function rawTextStartTagInBody(p, token) {
6573
6824
  p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
6574
6825
  }
6575
6826
  function selectStartTagInBody(p, token) {
@@ -6681,6 +6932,7 @@ function startTagInBody(p, token) {
6681
6932
  case TAG_ID.DETAILS:
6682
6933
  case TAG_ID.ADDRESS:
6683
6934
  case TAG_ID.ARTICLE:
6935
+ case TAG_ID.SEARCH:
6684
6936
  case TAG_ID.SECTION:
6685
6937
  case TAG_ID.SUMMARY:
6686
6938
  case TAG_ID.FIELDSET:
@@ -6804,8 +7056,9 @@ function startTagInBody(p, token) {
6804
7056
  optgroupStartTagInBody(p, token);
6805
7057
  break;
6806
7058
  }
6807
- case TAG_ID.NOEMBED: {
6808
- noembedStartTagInBody(p, token);
7059
+ case TAG_ID.NOEMBED:
7060
+ case TAG_ID.NOFRAMES: {
7061
+ rawTextStartTagInBody(p, token);
6809
7062
  break;
6810
7063
  }
6811
7064
  case TAG_ID.FRAMESET: {
@@ -6818,7 +7071,7 @@ function startTagInBody(p, token) {
6818
7071
  }
6819
7072
  case TAG_ID.NOSCRIPT: {
6820
7073
  if (p.options.scriptingEnabled) {
6821
- noembedStartTagInBody(p, token);
7074
+ rawTextStartTagInBody(p, token);
6822
7075
  }
6823
7076
  else {
6824
7077
  genericStartTagInBody(p, token);
@@ -6990,6 +7243,7 @@ function endTagInBody(p, token) {
6990
7243
  case TAG_ID.ADDRESS:
6991
7244
  case TAG_ID.ARTICLE:
6992
7245
  case TAG_ID.DETAILS:
7246
+ case TAG_ID.SEARCH:
6993
7247
  case TAG_ID.SECTION:
6994
7248
  case TAG_ID.SUMMARY:
6995
7249
  case TAG_ID.LISTING:
@@ -7590,6 +7844,17 @@ function startTagInSelect(p, token) {
7590
7844
  p._insertElement(token, NS.HTML);
7591
7845
  break;
7592
7846
  }
7847
+ case TAG_ID.HR: {
7848
+ if (p.openElements.currentTagId === TAG_ID.OPTION) {
7849
+ p.openElements.pop();
7850
+ }
7851
+ if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
7852
+ p.openElements.pop();
7853
+ }
7854
+ p._appendElement(token, NS.HTML);
7855
+ token.ackSelfClosing = true;
7856
+ break;
7857
+ }
7593
7858
  case TAG_ID.INPUT:
7594
7859
  case TAG_ID.KEYGEN:
7595
7860
  case TAG_ID.TEXTAREA: