vite 6.0.0-beta.3 → 6.0.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +52 -736
- package/bin/vite.js +1 -0
- package/dist/client/client.mjs +1 -1
- package/dist/node/chunks/{dep-DHwgfHPT.js → dep-BkDduZ8N.js} +9 -13
- package/dist/node/chunks/{dep-D-7KCb9p.js → dep-CdoEeCn3.js} +704 -439
- package/dist/node/chunks/{dep-ChZnDG_O.js → dep-Cpgpmu8-.js} +24796 -25355
- package/dist/node/chunks/{dep-CBTZ9M2V.js → dep-Cq6WeEUK.js} +4 -8
- package/dist/node/chunks/{dep-wWOLM6NS.js → dep-mtw2NpNs.js} +0 -4
- package/dist/node/cli.js +19 -33
- package/dist/node/index.d.ts +20 -6
- package/dist/node/index.js +5 -4
- package/dist/node/module-runner.js +1 -1
- package/dist/node-cjs/publicUtils.cjs +78 -92
- package/package.json +14 -14
@@ -1,9 +1,5 @@
|
|
1
|
-
import { fileURLToPath as __cjs_fileURLToPath } from 'node:url';
|
2
|
-
import { dirname as __cjs_dirname } from 'node:path';
|
3
1
|
import { createRequire as __cjs_createRequire } from 'node:module';
|
4
2
|
|
5
|
-
const __filename = __cjs_fileURLToPath(import.meta.url);
|
6
|
-
const __dirname = __cjs_dirname(__filename);
|
7
3
|
const require = __cjs_createRequire(import.meta.url);
|
8
4
|
const __require = require;
|
9
5
|
const UNDEFINED_CODE_POINTS = new Set([
|
@@ -24,7 +20,6 @@ var CODE_POINTS;
|
|
24
20
|
CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE";
|
25
21
|
CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK";
|
26
22
|
CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK";
|
27
|
-
CODE_POINTS[CODE_POINTS["NUMBER_SIGN"] = 35] = "NUMBER_SIGN";
|
28
23
|
CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND";
|
29
24
|
CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE";
|
30
25
|
CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS";
|
@@ -37,17 +32,12 @@ var CODE_POINTS;
|
|
37
32
|
CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN";
|
38
33
|
CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK";
|
39
34
|
CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A";
|
40
|
-
CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_F"] = 70] = "LATIN_CAPITAL_F";
|
41
|
-
CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_X"] = 88] = "LATIN_CAPITAL_X";
|
42
35
|
CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z";
|
43
36
|
CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET";
|
44
37
|
CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT";
|
45
38
|
CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A";
|
46
|
-
CODE_POINTS[CODE_POINTS["LATIN_SMALL_F"] = 102] = "LATIN_SMALL_F";
|
47
|
-
CODE_POINTS[CODE_POINTS["LATIN_SMALL_X"] = 120] = "LATIN_SMALL_X";
|
48
39
|
CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z";
|
49
|
-
|
50
|
-
})(CODE_POINTS = CODE_POINTS || (CODE_POINTS = {}));
|
40
|
+
})(CODE_POINTS || (CODE_POINTS = {}));
|
51
41
|
const SEQUENCES = {
|
52
42
|
DASH_DASH: '--',
|
53
43
|
CDATA_START: '[CDATA[',
|
@@ -137,7 +127,7 @@ var ERR;
|
|
137
127
|
ERR["misplacedStartTagForHeadElement"] = "misplaced-start-tag-for-head-element";
|
138
128
|
ERR["nestedNoscriptInHead"] = "nested-noscript-in-head";
|
139
129
|
ERR["eofInElementThatCanContainOnlyText"] = "eof-in-element-that-can-contain-only-text";
|
140
|
-
})(ERR
|
130
|
+
})(ERR || (ERR = {}));
|
141
131
|
|
142
132
|
//Const
|
143
133
|
const DEFAULT_BUFFER_WATERLINE = 1 << 16;
|
@@ -170,22 +160,24 @@ class Preprocessor {
|
|
170
160
|
get offset() {
|
171
161
|
return this.droppedBufferSize + this.pos;
|
172
162
|
}
|
173
|
-
getError(code) {
|
163
|
+
getError(code, cpOffset) {
|
174
164
|
const { line, col, offset } = this;
|
165
|
+
const startCol = col + cpOffset;
|
166
|
+
const startOffset = offset + cpOffset;
|
175
167
|
return {
|
176
168
|
code,
|
177
169
|
startLine: line,
|
178
170
|
endLine: line,
|
179
|
-
startCol
|
180
|
-
endCol:
|
181
|
-
startOffset
|
182
|
-
endOffset:
|
171
|
+
startCol,
|
172
|
+
endCol: startCol,
|
173
|
+
startOffset,
|
174
|
+
endOffset: startOffset,
|
183
175
|
};
|
184
176
|
}
|
185
177
|
_err(code) {
|
186
178
|
if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
|
187
179
|
this.lastErrOffset = this.offset;
|
188
|
-
this.handler.onParseError(this.getError(code));
|
180
|
+
this.handler.onParseError(this.getError(code, 0));
|
189
181
|
}
|
190
182
|
}
|
191
183
|
_addGap() {
|
@@ -343,7 +335,7 @@ var TokenType;
|
|
343
335
|
TokenType[TokenType["DOCTYPE"] = 6] = "DOCTYPE";
|
344
336
|
TokenType[TokenType["EOF"] = 7] = "EOF";
|
345
337
|
TokenType[TokenType["HIBERNATION"] = 8] = "HIBERNATION";
|
346
|
-
})(TokenType
|
338
|
+
})(TokenType || (TokenType = {}));
|
347
339
|
function getTokenAttr(token, attrName) {
|
348
340
|
for (let i = token.attrs.length - 1; i >= 0; i--) {
|
349
341
|
if (token.attrs[i].name === attrName) {
|
@@ -367,6 +359,51 @@ new Uint16Array(
|
|
367
359
|
.split("")
|
368
360
|
.map((c) => c.charCodeAt(0)));
|
369
361
|
|
362
|
+
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
|
363
|
+
const decodeMap = new Map([
|
364
|
+
[0, 65533],
|
365
|
+
// C1 Unicode control character reference replacements
|
366
|
+
[128, 8364],
|
367
|
+
[130, 8218],
|
368
|
+
[131, 402],
|
369
|
+
[132, 8222],
|
370
|
+
[133, 8230],
|
371
|
+
[134, 8224],
|
372
|
+
[135, 8225],
|
373
|
+
[136, 710],
|
374
|
+
[137, 8240],
|
375
|
+
[138, 352],
|
376
|
+
[139, 8249],
|
377
|
+
[140, 338],
|
378
|
+
[142, 381],
|
379
|
+
[145, 8216],
|
380
|
+
[146, 8217],
|
381
|
+
[147, 8220],
|
382
|
+
[148, 8221],
|
383
|
+
[149, 8226],
|
384
|
+
[150, 8211],
|
385
|
+
[151, 8212],
|
386
|
+
[152, 732],
|
387
|
+
[153, 8482],
|
388
|
+
[154, 353],
|
389
|
+
[155, 8250],
|
390
|
+
[156, 339],
|
391
|
+
[158, 382],
|
392
|
+
[159, 376],
|
393
|
+
]);
|
394
|
+
/**
|
395
|
+
* Replace the given code point with a replacement character if it is a
|
396
|
+
* surrogate or is outside the valid range. Otherwise return the code
|
397
|
+
* point unchanged.
|
398
|
+
*/
|
399
|
+
function replaceCodePoint(codePoint) {
|
400
|
+
var _a;
|
401
|
+
if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
|
402
|
+
return 0xfffd;
|
403
|
+
}
|
404
|
+
return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
|
405
|
+
}
|
406
|
+
|
370
407
|
var CharCodes;
|
371
408
|
(function (CharCodes) {
|
372
409
|
CharCodes[CharCodes["NUM"] = 35] = "NUM";
|
@@ -382,12 +419,35 @@ var CharCodes;
|
|
382
419
|
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
|
383
420
|
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
|
384
421
|
})(CharCodes || (CharCodes = {}));
|
422
|
+
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
|
423
|
+
const TO_LOWER_BIT = 0b100000;
|
385
424
|
var BinTrieFlags;
|
386
425
|
(function (BinTrieFlags) {
|
387
426
|
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
|
388
427
|
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
|
389
428
|
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
|
390
429
|
})(BinTrieFlags || (BinTrieFlags = {}));
|
430
|
+
function isNumber(code) {
|
431
|
+
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
|
432
|
+
}
|
433
|
+
function isHexadecimalCharacter(code) {
|
434
|
+
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
|
435
|
+
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
|
436
|
+
}
|
437
|
+
function isAsciiAlphaNumeric$1(code) {
|
438
|
+
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
|
439
|
+
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
|
440
|
+
isNumber(code));
|
441
|
+
}
|
442
|
+
/**
|
443
|
+
* Checks if the given character is a valid end character for an entity in an attribute.
|
444
|
+
*
|
445
|
+
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
|
446
|
+
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
447
|
+
*/
|
448
|
+
function isEntityInAttributeInvalidEnd(code) {
|
449
|
+
return code === CharCodes.EQUALS || isAsciiAlphaNumeric$1(code);
|
450
|
+
}
|
391
451
|
var EntityDecoderState;
|
392
452
|
(function (EntityDecoderState) {
|
393
453
|
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
|
@@ -405,6 +465,320 @@ var DecodingMode;
|
|
405
465
|
/** Entities in attributes have limitations on ending characters. */
|
406
466
|
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
|
407
467
|
})(DecodingMode || (DecodingMode = {}));
|
468
|
+
/**
|
469
|
+
* Token decoder with support of writing partial entities.
|
470
|
+
*/
|
471
|
+
class EntityDecoder {
|
472
|
+
constructor(
|
473
|
+
/** The tree used to decode entities. */
|
474
|
+
decodeTree,
|
475
|
+
/**
|
476
|
+
* The function that is called when a codepoint is decoded.
|
477
|
+
*
|
478
|
+
* For multi-byte named entities, this will be called multiple times,
|
479
|
+
* with the second codepoint, and the same `consumed` value.
|
480
|
+
*
|
481
|
+
* @param codepoint The decoded codepoint.
|
482
|
+
* @param consumed The number of bytes consumed by the decoder.
|
483
|
+
*/
|
484
|
+
emitCodePoint,
|
485
|
+
/** An object that is used to produce errors. */
|
486
|
+
errors) {
|
487
|
+
this.decodeTree = decodeTree;
|
488
|
+
this.emitCodePoint = emitCodePoint;
|
489
|
+
this.errors = errors;
|
490
|
+
/** The current state of the decoder. */
|
491
|
+
this.state = EntityDecoderState.EntityStart;
|
492
|
+
/** Characters that were consumed while parsing an entity. */
|
493
|
+
this.consumed = 1;
|
494
|
+
/**
|
495
|
+
* The result of the entity.
|
496
|
+
*
|
497
|
+
* Either the result index of a numeric entity, or the codepoint of a
|
498
|
+
* numeric entity.
|
499
|
+
*/
|
500
|
+
this.result = 0;
|
501
|
+
/** The current index in the decode tree. */
|
502
|
+
this.treeIndex = 0;
|
503
|
+
/** The number of characters that were consumed in excess. */
|
504
|
+
this.excess = 1;
|
505
|
+
/** The mode in which the decoder is operating. */
|
506
|
+
this.decodeMode = DecodingMode.Strict;
|
507
|
+
}
|
508
|
+
/** Resets the instance to make it reusable. */
|
509
|
+
startEntity(decodeMode) {
|
510
|
+
this.decodeMode = decodeMode;
|
511
|
+
this.state = EntityDecoderState.EntityStart;
|
512
|
+
this.result = 0;
|
513
|
+
this.treeIndex = 0;
|
514
|
+
this.excess = 1;
|
515
|
+
this.consumed = 1;
|
516
|
+
}
|
517
|
+
/**
|
518
|
+
* Write an entity to the decoder. This can be called multiple times with partial entities.
|
519
|
+
* If the entity is incomplete, the decoder will return -1.
|
520
|
+
*
|
521
|
+
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
522
|
+
* entity is incomplete, and resume when the next string is written.
|
523
|
+
*
|
524
|
+
* @param string The string containing the entity (or a continuation of the entity).
|
525
|
+
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
526
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
527
|
+
*/
|
528
|
+
write(str, offset) {
|
529
|
+
switch (this.state) {
|
530
|
+
case EntityDecoderState.EntityStart: {
|
531
|
+
if (str.charCodeAt(offset) === CharCodes.NUM) {
|
532
|
+
this.state = EntityDecoderState.NumericStart;
|
533
|
+
this.consumed += 1;
|
534
|
+
return this.stateNumericStart(str, offset + 1);
|
535
|
+
}
|
536
|
+
this.state = EntityDecoderState.NamedEntity;
|
537
|
+
return this.stateNamedEntity(str, offset);
|
538
|
+
}
|
539
|
+
case EntityDecoderState.NumericStart: {
|
540
|
+
return this.stateNumericStart(str, offset);
|
541
|
+
}
|
542
|
+
case EntityDecoderState.NumericDecimal: {
|
543
|
+
return this.stateNumericDecimal(str, offset);
|
544
|
+
}
|
545
|
+
case EntityDecoderState.NumericHex: {
|
546
|
+
return this.stateNumericHex(str, offset);
|
547
|
+
}
|
548
|
+
case EntityDecoderState.NamedEntity: {
|
549
|
+
return this.stateNamedEntity(str, offset);
|
550
|
+
}
|
551
|
+
}
|
552
|
+
}
|
553
|
+
/**
|
554
|
+
* Switches between the numeric decimal and hexadecimal states.
|
555
|
+
*
|
556
|
+
* Equivalent to the `Numeric character reference state` in the HTML spec.
|
557
|
+
*
|
558
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
559
|
+
* @param offset The current offset.
|
560
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
561
|
+
*/
|
562
|
+
stateNumericStart(str, offset) {
|
563
|
+
if (offset >= str.length) {
|
564
|
+
return -1;
|
565
|
+
}
|
566
|
+
if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
|
567
|
+
this.state = EntityDecoderState.NumericHex;
|
568
|
+
this.consumed += 1;
|
569
|
+
return this.stateNumericHex(str, offset + 1);
|
570
|
+
}
|
571
|
+
this.state = EntityDecoderState.NumericDecimal;
|
572
|
+
return this.stateNumericDecimal(str, offset);
|
573
|
+
}
|
574
|
+
addToNumericResult(str, start, end, base) {
|
575
|
+
if (start !== end) {
|
576
|
+
const digitCount = end - start;
|
577
|
+
this.result =
|
578
|
+
this.result * Math.pow(base, digitCount) +
|
579
|
+
parseInt(str.substr(start, digitCount), base);
|
580
|
+
this.consumed += digitCount;
|
581
|
+
}
|
582
|
+
}
|
583
|
+
/**
|
584
|
+
* Parses a hexadecimal numeric entity.
|
585
|
+
*
|
586
|
+
* Equivalent to the `Hexademical character reference state` in the HTML spec.
|
587
|
+
*
|
588
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
589
|
+
* @param offset The current offset.
|
590
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
591
|
+
*/
|
592
|
+
stateNumericHex(str, offset) {
|
593
|
+
const startIdx = offset;
|
594
|
+
while (offset < str.length) {
|
595
|
+
const char = str.charCodeAt(offset);
|
596
|
+
if (isNumber(char) || isHexadecimalCharacter(char)) {
|
597
|
+
offset += 1;
|
598
|
+
}
|
599
|
+
else {
|
600
|
+
this.addToNumericResult(str, startIdx, offset, 16);
|
601
|
+
return this.emitNumericEntity(char, 3);
|
602
|
+
}
|
603
|
+
}
|
604
|
+
this.addToNumericResult(str, startIdx, offset, 16);
|
605
|
+
return -1;
|
606
|
+
}
|
607
|
+
/**
|
608
|
+
* Parses a decimal numeric entity.
|
609
|
+
*
|
610
|
+
* Equivalent to the `Decimal character reference state` in the HTML spec.
|
611
|
+
*
|
612
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
613
|
+
* @param offset The current offset.
|
614
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
615
|
+
*/
|
616
|
+
stateNumericDecimal(str, offset) {
|
617
|
+
const startIdx = offset;
|
618
|
+
while (offset < str.length) {
|
619
|
+
const char = str.charCodeAt(offset);
|
620
|
+
if (isNumber(char)) {
|
621
|
+
offset += 1;
|
622
|
+
}
|
623
|
+
else {
|
624
|
+
this.addToNumericResult(str, startIdx, offset, 10);
|
625
|
+
return this.emitNumericEntity(char, 2);
|
626
|
+
}
|
627
|
+
}
|
628
|
+
this.addToNumericResult(str, startIdx, offset, 10);
|
629
|
+
return -1;
|
630
|
+
}
|
631
|
+
/**
|
632
|
+
* Validate and emit a numeric entity.
|
633
|
+
*
|
634
|
+
* Implements the logic from the `Hexademical character reference start
|
635
|
+
* state` and `Numeric character reference end state` in the HTML spec.
|
636
|
+
*
|
637
|
+
* @param lastCp The last code point of the entity. Used to see if the
|
638
|
+
* entity was terminated with a semicolon.
|
639
|
+
* @param expectedLength The minimum number of characters that should be
|
640
|
+
* consumed. Used to validate that at least one digit
|
641
|
+
* was consumed.
|
642
|
+
* @returns The number of characters that were consumed.
|
643
|
+
*/
|
644
|
+
emitNumericEntity(lastCp, expectedLength) {
|
645
|
+
var _a;
|
646
|
+
// Ensure we consumed at least one digit.
|
647
|
+
if (this.consumed <= expectedLength) {
|
648
|
+
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
|
649
|
+
return 0;
|
650
|
+
}
|
651
|
+
// Figure out if this is a legit end of the entity
|
652
|
+
if (lastCp === CharCodes.SEMI) {
|
653
|
+
this.consumed += 1;
|
654
|
+
}
|
655
|
+
else if (this.decodeMode === DecodingMode.Strict) {
|
656
|
+
return 0;
|
657
|
+
}
|
658
|
+
this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
|
659
|
+
if (this.errors) {
|
660
|
+
if (lastCp !== CharCodes.SEMI) {
|
661
|
+
this.errors.missingSemicolonAfterCharacterReference();
|
662
|
+
}
|
663
|
+
this.errors.validateNumericCharacterReference(this.result);
|
664
|
+
}
|
665
|
+
return this.consumed;
|
666
|
+
}
|
667
|
+
/**
|
668
|
+
* Parses a named entity.
|
669
|
+
*
|
670
|
+
* Equivalent to the `Named character reference state` in the HTML spec.
|
671
|
+
*
|
672
|
+
* @param str The string containing the entity (or a continuation of the entity).
|
673
|
+
* @param offset The current offset.
|
674
|
+
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
675
|
+
*/
|
676
|
+
stateNamedEntity(str, offset) {
|
677
|
+
const { decodeTree } = this;
|
678
|
+
let current = decodeTree[this.treeIndex];
|
679
|
+
// The mask is the number of bytes of the value, including the current byte.
|
680
|
+
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
681
|
+
for (; offset < str.length; offset++, this.excess++) {
|
682
|
+
const char = str.charCodeAt(offset);
|
683
|
+
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
|
684
|
+
if (this.treeIndex < 0) {
|
685
|
+
return this.result === 0 ||
|
686
|
+
// If we are parsing an attribute
|
687
|
+
(this.decodeMode === DecodingMode.Attribute &&
|
688
|
+
// We shouldn't have consumed any characters after the entity,
|
689
|
+
(valueLength === 0 ||
|
690
|
+
// And there should be no invalid characters.
|
691
|
+
isEntityInAttributeInvalidEnd(char)))
|
692
|
+
? 0
|
693
|
+
: this.emitNotTerminatedNamedEntity();
|
694
|
+
}
|
695
|
+
current = decodeTree[this.treeIndex];
|
696
|
+
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
697
|
+
// If the branch is a value, store it and continue
|
698
|
+
if (valueLength !== 0) {
|
699
|
+
// If the entity is terminated by a semicolon, we are done.
|
700
|
+
if (char === CharCodes.SEMI) {
|
701
|
+
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
|
702
|
+
}
|
703
|
+
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
|
704
|
+
if (this.decodeMode !== DecodingMode.Strict) {
|
705
|
+
this.result = this.treeIndex;
|
706
|
+
this.consumed += this.excess;
|
707
|
+
this.excess = 0;
|
708
|
+
}
|
709
|
+
}
|
710
|
+
}
|
711
|
+
return -1;
|
712
|
+
}
|
713
|
+
/**
|
714
|
+
* Emit a named entity that was not terminated with a semicolon.
|
715
|
+
*
|
716
|
+
* @returns The number of characters consumed.
|
717
|
+
*/
|
718
|
+
emitNotTerminatedNamedEntity() {
|
719
|
+
var _a;
|
720
|
+
const { result, decodeTree } = this;
|
721
|
+
const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
|
722
|
+
this.emitNamedEntityData(result, valueLength, this.consumed);
|
723
|
+
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
|
724
|
+
return this.consumed;
|
725
|
+
}
|
726
|
+
/**
|
727
|
+
* Emit a named entity.
|
728
|
+
*
|
729
|
+
* @param result The index of the entity in the decode tree.
|
730
|
+
* @param valueLength The number of bytes in the entity.
|
731
|
+
* @param consumed The number of characters consumed.
|
732
|
+
*
|
733
|
+
* @returns The number of characters consumed.
|
734
|
+
*/
|
735
|
+
emitNamedEntityData(result, valueLength, consumed) {
|
736
|
+
const { decodeTree } = this;
|
737
|
+
this.emitCodePoint(valueLength === 1
|
738
|
+
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
|
739
|
+
: decodeTree[result + 1], consumed);
|
740
|
+
if (valueLength === 3) {
|
741
|
+
// For multi-byte values, we need to emit the second byte.
|
742
|
+
this.emitCodePoint(decodeTree[result + 2], consumed);
|
743
|
+
}
|
744
|
+
return consumed;
|
745
|
+
}
|
746
|
+
/**
|
747
|
+
* Signal to the parser that the end of the input was reached.
|
748
|
+
*
|
749
|
+
* Remaining data will be emitted and relevant errors will be produced.
|
750
|
+
*
|
751
|
+
* @returns The number of characters consumed.
|
752
|
+
*/
|
753
|
+
end() {
|
754
|
+
var _a;
|
755
|
+
switch (this.state) {
|
756
|
+
case EntityDecoderState.NamedEntity: {
|
757
|
+
// Emit a named entity if we have one.
|
758
|
+
return this.result !== 0 &&
|
759
|
+
(this.decodeMode !== DecodingMode.Attribute ||
|
760
|
+
this.result === this.treeIndex)
|
761
|
+
? this.emitNotTerminatedNamedEntity()
|
762
|
+
: 0;
|
763
|
+
}
|
764
|
+
// Otherwise, emit a numeric entity if we have one.
|
765
|
+
case EntityDecoderState.NumericDecimal: {
|
766
|
+
return this.emitNumericEntity(0, 2);
|
767
|
+
}
|
768
|
+
case EntityDecoderState.NumericHex: {
|
769
|
+
return this.emitNumericEntity(0, 3);
|
770
|
+
}
|
771
|
+
case EntityDecoderState.NumericStart: {
|
772
|
+
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
|
773
|
+
return 0;
|
774
|
+
}
|
775
|
+
case EntityDecoderState.EntityStart: {
|
776
|
+
// Return 0 if we have no entity.
|
777
|
+
return 0;
|
778
|
+
}
|
779
|
+
}
|
780
|
+
}
|
781
|
+
}
|
408
782
|
/**
|
409
783
|
* Determines the branch of the current node that is taken given the current
|
410
784
|
* character. This function is used to traverse the trie.
|
@@ -458,7 +832,7 @@ var NS;
|
|
458
832
|
NS["XLINK"] = "http://www.w3.org/1999/xlink";
|
459
833
|
NS["XML"] = "http://www.w3.org/XML/1998/namespace";
|
460
834
|
NS["XMLNS"] = "http://www.w3.org/2000/xmlns/";
|
461
|
-
})(NS
|
835
|
+
})(NS || (NS = {}));
|
462
836
|
var ATTRS;
|
463
837
|
(function (ATTRS) {
|
464
838
|
ATTRS["TYPE"] = "type";
|
@@ -469,7 +843,7 @@ var ATTRS;
|
|
469
843
|
ATTRS["COLOR"] = "color";
|
470
844
|
ATTRS["FACE"] = "face";
|
471
845
|
ATTRS["SIZE"] = "size";
|
472
|
-
})(ATTRS
|
846
|
+
})(ATTRS || (ATTRS = {}));
|
473
847
|
/**
|
474
848
|
* The mode of the document.
|
475
849
|
*
|
@@ -480,7 +854,7 @@ var DOCUMENT_MODE;
|
|
480
854
|
DOCUMENT_MODE["NO_QUIRKS"] = "no-quirks";
|
481
855
|
DOCUMENT_MODE["QUIRKS"] = "quirks";
|
482
856
|
DOCUMENT_MODE["LIMITED_QUIRKS"] = "limited-quirks";
|
483
|
-
})(DOCUMENT_MODE
|
857
|
+
})(DOCUMENT_MODE || (DOCUMENT_MODE = {}));
|
484
858
|
var TAG_NAMES;
|
485
859
|
(function (TAG_NAMES) {
|
486
860
|
TAG_NAMES["A"] = "a";
|
@@ -576,6 +950,7 @@ var TAG_NAMES;
|
|
576
950
|
TAG_NAMES["RUBY"] = "ruby";
|
577
951
|
TAG_NAMES["S"] = "s";
|
578
952
|
TAG_NAMES["SCRIPT"] = "script";
|
953
|
+
TAG_NAMES["SEARCH"] = "search";
|
579
954
|
TAG_NAMES["SECTION"] = "section";
|
580
955
|
TAG_NAMES["SELECT"] = "select";
|
581
956
|
TAG_NAMES["SOURCE"] = "source";
|
@@ -605,7 +980,7 @@ var TAG_NAMES;
|
|
605
980
|
TAG_NAMES["VAR"] = "var";
|
606
981
|
TAG_NAMES["WBR"] = "wbr";
|
607
982
|
TAG_NAMES["XMP"] = "xmp";
|
608
|
-
})(TAG_NAMES
|
983
|
+
})(TAG_NAMES || (TAG_NAMES = {}));
|
609
984
|
/**
|
610
985
|
* Tag IDs are numeric IDs for known tag names.
|
611
986
|
*
|
@@ -707,36 +1082,37 @@ var TAG_ID;
|
|
707
1082
|
TAG_ID[TAG_ID["RUBY"] = 91] = "RUBY";
|
708
1083
|
TAG_ID[TAG_ID["S"] = 92] = "S";
|
709
1084
|
TAG_ID[TAG_ID["SCRIPT"] = 93] = "SCRIPT";
|
710
|
-
TAG_ID[TAG_ID["
|
711
|
-
TAG_ID[TAG_ID["
|
712
|
-
TAG_ID[TAG_ID["
|
713
|
-
TAG_ID[TAG_ID["
|
714
|
-
TAG_ID[TAG_ID["
|
715
|
-
TAG_ID[TAG_ID["
|
716
|
-
TAG_ID[TAG_ID["
|
717
|
-
TAG_ID[TAG_ID["
|
718
|
-
TAG_ID[TAG_ID["
|
719
|
-
TAG_ID[TAG_ID["
|
720
|
-
TAG_ID[TAG_ID["
|
721
|
-
TAG_ID[TAG_ID["
|
722
|
-
TAG_ID[TAG_ID["
|
723
|
-
TAG_ID[TAG_ID["
|
724
|
-
TAG_ID[TAG_ID["
|
725
|
-
TAG_ID[TAG_ID["
|
726
|
-
TAG_ID[TAG_ID["
|
727
|
-
TAG_ID[TAG_ID["
|
728
|
-
TAG_ID[TAG_ID["
|
729
|
-
TAG_ID[TAG_ID["
|
730
|
-
TAG_ID[TAG_ID["
|
731
|
-
TAG_ID[TAG_ID["
|
732
|
-
TAG_ID[TAG_ID["
|
733
|
-
TAG_ID[TAG_ID["
|
734
|
-
TAG_ID[TAG_ID["
|
735
|
-
TAG_ID[TAG_ID["
|
736
|
-
TAG_ID[TAG_ID["
|
737
|
-
TAG_ID[TAG_ID["
|
738
|
-
TAG_ID[TAG_ID["
|
739
|
-
|
1085
|
+
TAG_ID[TAG_ID["SEARCH"] = 94] = "SEARCH";
|
1086
|
+
TAG_ID[TAG_ID["SECTION"] = 95] = "SECTION";
|
1087
|
+
TAG_ID[TAG_ID["SELECT"] = 96] = "SELECT";
|
1088
|
+
TAG_ID[TAG_ID["SOURCE"] = 97] = "SOURCE";
|
1089
|
+
TAG_ID[TAG_ID["SMALL"] = 98] = "SMALL";
|
1090
|
+
TAG_ID[TAG_ID["SPAN"] = 99] = "SPAN";
|
1091
|
+
TAG_ID[TAG_ID["STRIKE"] = 100] = "STRIKE";
|
1092
|
+
TAG_ID[TAG_ID["STRONG"] = 101] = "STRONG";
|
1093
|
+
TAG_ID[TAG_ID["STYLE"] = 102] = "STYLE";
|
1094
|
+
TAG_ID[TAG_ID["SUB"] = 103] = "SUB";
|
1095
|
+
TAG_ID[TAG_ID["SUMMARY"] = 104] = "SUMMARY";
|
1096
|
+
TAG_ID[TAG_ID["SUP"] = 105] = "SUP";
|
1097
|
+
TAG_ID[TAG_ID["TABLE"] = 106] = "TABLE";
|
1098
|
+
TAG_ID[TAG_ID["TBODY"] = 107] = "TBODY";
|
1099
|
+
TAG_ID[TAG_ID["TEMPLATE"] = 108] = "TEMPLATE";
|
1100
|
+
TAG_ID[TAG_ID["TEXTAREA"] = 109] = "TEXTAREA";
|
1101
|
+
TAG_ID[TAG_ID["TFOOT"] = 110] = "TFOOT";
|
1102
|
+
TAG_ID[TAG_ID["TD"] = 111] = "TD";
|
1103
|
+
TAG_ID[TAG_ID["TH"] = 112] = "TH";
|
1104
|
+
TAG_ID[TAG_ID["THEAD"] = 113] = "THEAD";
|
1105
|
+
TAG_ID[TAG_ID["TITLE"] = 114] = "TITLE";
|
1106
|
+
TAG_ID[TAG_ID["TR"] = 115] = "TR";
|
1107
|
+
TAG_ID[TAG_ID["TRACK"] = 116] = "TRACK";
|
1108
|
+
TAG_ID[TAG_ID["TT"] = 117] = "TT";
|
1109
|
+
TAG_ID[TAG_ID["U"] = 118] = "U";
|
1110
|
+
TAG_ID[TAG_ID["UL"] = 119] = "UL";
|
1111
|
+
TAG_ID[TAG_ID["SVG"] = 120] = "SVG";
|
1112
|
+
TAG_ID[TAG_ID["VAR"] = 121] = "VAR";
|
1113
|
+
TAG_ID[TAG_ID["WBR"] = 122] = "WBR";
|
1114
|
+
TAG_ID[TAG_ID["XMP"] = 123] = "XMP";
|
1115
|
+
})(TAG_ID || (TAG_ID = {}));
|
740
1116
|
const TAG_NAME_TO_ID = new Map([
|
741
1117
|
[TAG_NAMES.A, TAG_ID.A],
|
742
1118
|
[TAG_NAMES.ADDRESS, TAG_ID.ADDRESS],
|
@@ -831,6 +1207,7 @@ const TAG_NAME_TO_ID = new Map([
|
|
831
1207
|
[TAG_NAMES.RUBY, TAG_ID.RUBY],
|
832
1208
|
[TAG_NAMES.S, TAG_ID.S],
|
833
1209
|
[TAG_NAMES.SCRIPT, TAG_ID.SCRIPT],
|
1210
|
+
[TAG_NAMES.SEARCH, TAG_ID.SEARCH],
|
834
1211
|
[TAG_NAMES.SECTION, TAG_ID.SECTION],
|
835
1212
|
[TAG_NAMES.SELECT, TAG_ID.SELECT],
|
836
1213
|
[TAG_NAMES.SOURCE, TAG_ID.SOURCE],
|
@@ -956,40 +1333,8 @@ const SPECIAL_ELEMENTS = {
|
|
956
1333
|
[NS.XML]: new Set(),
|
957
1334
|
[NS.XMLNS]: new Set(),
|
958
1335
|
};
|
959
|
-
|
960
|
-
return tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6;
|
961
|
-
}
|
1336
|
+
const NUMBERED_HEADERS = new Set([$.H1, $.H2, $.H3, $.H4, $.H5, $.H6]);
|
962
1337
|
|
963
|
-
//C1 Unicode control character reference replacements
|
964
|
-
const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
|
965
|
-
[0x80, 8364],
|
966
|
-
[0x82, 8218],
|
967
|
-
[0x83, 402],
|
968
|
-
[0x84, 8222],
|
969
|
-
[0x85, 8230],
|
970
|
-
[0x86, 8224],
|
971
|
-
[0x87, 8225],
|
972
|
-
[0x88, 710],
|
973
|
-
[0x89, 8240],
|
974
|
-
[0x8a, 352],
|
975
|
-
[0x8b, 8249],
|
976
|
-
[0x8c, 338],
|
977
|
-
[0x8e, 381],
|
978
|
-
[0x91, 8216],
|
979
|
-
[0x92, 8217],
|
980
|
-
[0x93, 8220],
|
981
|
-
[0x94, 8221],
|
982
|
-
[0x95, 8226],
|
983
|
-
[0x96, 8211],
|
984
|
-
[0x97, 8212],
|
985
|
-
[0x98, 732],
|
986
|
-
[0x99, 8482],
|
987
|
-
[0x9a, 353],
|
988
|
-
[0x9b, 8250],
|
989
|
-
[0x9c, 339],
|
990
|
-
[0x9e, 382],
|
991
|
-
[0x9f, 376],
|
992
|
-
]);
|
993
1338
|
//States
|
994
1339
|
var State;
|
995
1340
|
(function (State) {
|
@@ -1065,13 +1410,7 @@ var State;
|
|
1065
1410
|
State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
|
1066
1411
|
State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
|
1067
1412
|
State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
|
1068
|
-
State[State["
|
1069
|
-
State[State["AMBIGUOUS_AMPERSAND"] = 73] = "AMBIGUOUS_AMPERSAND";
|
1070
|
-
State[State["NUMERIC_CHARACTER_REFERENCE"] = 74] = "NUMERIC_CHARACTER_REFERENCE";
|
1071
|
-
State[State["HEXADEMICAL_CHARACTER_REFERENCE_START"] = 75] = "HEXADEMICAL_CHARACTER_REFERENCE_START";
|
1072
|
-
State[State["HEXADEMICAL_CHARACTER_REFERENCE"] = 76] = "HEXADEMICAL_CHARACTER_REFERENCE";
|
1073
|
-
State[State["DECIMAL_CHARACTER_REFERENCE"] = 77] = "DECIMAL_CHARACTER_REFERENCE";
|
1074
|
-
State[State["NUMERIC_CHARACTER_REFERENCE_END"] = 78] = "NUMERIC_CHARACTER_REFERENCE_END";
|
1413
|
+
State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND";
|
1075
1414
|
})(State || (State = {}));
|
1076
1415
|
//Tokenizer initial states for different modes
|
1077
1416
|
const TokenizerMode = {
|
@@ -1101,27 +1440,33 @@ function isAsciiLetter(cp) {
|
|
1101
1440
|
function isAsciiAlphaNumeric(cp) {
|
1102
1441
|
return isAsciiLetter(cp) || isAsciiDigit(cp);
|
1103
1442
|
}
|
1104
|
-
function isAsciiUpperHexDigit(cp) {
|
1105
|
-
return cp >= CODE_POINTS.LATIN_CAPITAL_A && cp <= CODE_POINTS.LATIN_CAPITAL_F;
|
1106
|
-
}
|
1107
|
-
function isAsciiLowerHexDigit(cp) {
|
1108
|
-
return cp >= CODE_POINTS.LATIN_SMALL_A && cp <= CODE_POINTS.LATIN_SMALL_F;
|
1109
|
-
}
|
1110
|
-
function isAsciiHexDigit(cp) {
|
1111
|
-
return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp);
|
1112
|
-
}
|
1113
1443
|
function toAsciiLower(cp) {
|
1114
1444
|
return cp + 32;
|
1115
1445
|
}
|
1116
1446
|
function isWhitespace(cp) {
|
1117
1447
|
return cp === CODE_POINTS.SPACE || cp === CODE_POINTS.LINE_FEED || cp === CODE_POINTS.TABULATION || cp === CODE_POINTS.FORM_FEED;
|
1118
1448
|
}
|
1119
|
-
function isEntityInAttributeInvalidEnd(nextCp) {
|
1120
|
-
return nextCp === CODE_POINTS.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp);
|
1121
|
-
}
|
1122
1449
|
function isScriptDataDoubleEscapeSequenceEnd(cp) {
|
1123
1450
|
return isWhitespace(cp) || cp === CODE_POINTS.SOLIDUS || cp === CODE_POINTS.GREATER_THAN_SIGN;
|
1124
1451
|
}
|
1452
|
+
function getErrorForNumericCharacterReference(code) {
|
1453
|
+
if (code === CODE_POINTS.NULL) {
|
1454
|
+
return ERR.nullCharacterReference;
|
1455
|
+
}
|
1456
|
+
else if (code > 1114111) {
|
1457
|
+
return ERR.characterReferenceOutsideUnicodeRange;
|
1458
|
+
}
|
1459
|
+
else if (isSurrogate(code)) {
|
1460
|
+
return ERR.surrogateCharacterReference;
|
1461
|
+
}
|
1462
|
+
else if (isUndefinedCodePoint(code)) {
|
1463
|
+
return ERR.noncharacterCharacterReference;
|
1464
|
+
}
|
1465
|
+
else if (isControlCodePoint(code) || code === CODE_POINTS.CARRIAGE_RETURN) {
|
1466
|
+
return ERR.controlCharacterReference;
|
1467
|
+
}
|
1468
|
+
return null;
|
1469
|
+
}
|
1125
1470
|
//Tokenizer
|
1126
1471
|
class Tokenizer {
|
1127
1472
|
constructor(options, handler) {
|
@@ -1141,18 +1486,38 @@ class Tokenizer {
|
|
1141
1486
|
this.active = false;
|
1142
1487
|
this.state = State.DATA;
|
1143
1488
|
this.returnState = State.DATA;
|
1144
|
-
this.
|
1489
|
+
this.entityStartPos = 0;
|
1145
1490
|
this.consumedAfterSnapshot = -1;
|
1146
1491
|
this.currentCharacterToken = null;
|
1147
1492
|
this.currentToken = null;
|
1148
1493
|
this.currentAttr = { name: '', value: '' };
|
1149
1494
|
this.preprocessor = new Preprocessor(handler);
|
1150
1495
|
this.currentLocation = this.getCurrentLocation(-1);
|
1496
|
+
this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => {
|
1497
|
+
// Note: Set `pos` _before_ flushing, as flushing might drop
|
1498
|
+
// the current chunk and invalidate `entityStartPos`.
|
1499
|
+
this.preprocessor.pos = this.entityStartPos + consumed - 1;
|
1500
|
+
this._flushCodePointConsumedAsCharacterReference(cp);
|
1501
|
+
}, handler.onParseError
|
1502
|
+
? {
|
1503
|
+
missingSemicolonAfterCharacterReference: () => {
|
1504
|
+
this._err(ERR.missingSemicolonAfterCharacterReference, 1);
|
1505
|
+
},
|
1506
|
+
absenceOfDigitsInNumericCharacterReference: (consumed) => {
|
1507
|
+
this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed);
|
1508
|
+
},
|
1509
|
+
validateNumericCharacterReference: (code) => {
|
1510
|
+
const error = getErrorForNumericCharacterReference(code);
|
1511
|
+
if (error)
|
1512
|
+
this._err(error, 1);
|
1513
|
+
},
|
1514
|
+
}
|
1515
|
+
: undefined);
|
1151
1516
|
}
|
1152
1517
|
//Errors
|
1153
|
-
_err(code) {
|
1518
|
+
_err(code, cpOffset = 0) {
|
1154
1519
|
var _a, _b;
|
1155
|
-
(_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code));
|
1520
|
+
(_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset));
|
1156
1521
|
}
|
1157
1522
|
// NOTE: `offset` may never run across line boundaries.
|
1158
1523
|
getCurrentLocation(offset) {
|
@@ -1214,7 +1579,8 @@ class Tokenizer {
|
|
1214
1579
|
//Hibernation
|
1215
1580
|
_ensureHibernation() {
|
1216
1581
|
if (this.preprocessor.endOfChunkHit) {
|
1217
|
-
this.
|
1582
|
+
this.preprocessor.retreat(this.consumedAfterSnapshot);
|
1583
|
+
this.consumedAfterSnapshot = 0;
|
1218
1584
|
this.active = false;
|
1219
1585
|
return true;
|
1220
1586
|
}
|
@@ -1225,14 +1591,6 @@ class Tokenizer {
|
|
1225
1591
|
this.consumedAfterSnapshot++;
|
1226
1592
|
return this.preprocessor.advance();
|
1227
1593
|
}
|
1228
|
-
_unconsume(count) {
|
1229
|
-
this.consumedAfterSnapshot -= count;
|
1230
|
-
this.preprocessor.retreat(count);
|
1231
|
-
}
|
1232
|
-
_reconsumeInState(state, cp) {
|
1233
|
-
this.state = state;
|
1234
|
-
this._callState(cp);
|
1235
|
-
}
|
1236
1594
|
_advanceBy(count) {
|
1237
1595
|
this.consumedAfterSnapshot += count;
|
1238
1596
|
for (let i = 0; i < count; i++) {
|
@@ -1404,7 +1762,7 @@ class Tokenizer {
|
|
1404
1762
|
this.active = false;
|
1405
1763
|
}
|
1406
1764
|
//Characters emission
|
1407
|
-
//OPTIMIZATION: specification uses only one type of character
|
1765
|
+
//OPTIMIZATION: The specification uses only one type of character token (one token per character).
|
1408
1766
|
//This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
|
1409
1767
|
//If we have a sequence of characters that belong to the same group, the parser can process it
|
1410
1768
|
//as a single solid character token.
|
@@ -1414,15 +1772,15 @@ class Tokenizer {
|
|
1414
1772
|
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
|
1415
1773
|
_appendCharToCurrentCharacterToken(type, ch) {
|
1416
1774
|
if (this.currentCharacterToken) {
|
1417
|
-
if (this.currentCharacterToken.type
|
1775
|
+
if (this.currentCharacterToken.type === type) {
|
1776
|
+
this.currentCharacterToken.chars += ch;
|
1777
|
+
return;
|
1778
|
+
}
|
1779
|
+
else {
|
1418
1780
|
this.currentLocation = this.getCurrentLocation(0);
|
1419
1781
|
this._emitCurrentCharacterToken(this.currentLocation);
|
1420
1782
|
this.preprocessor.dropParsedChunk();
|
1421
1783
|
}
|
1422
|
-
else {
|
1423
|
-
this.currentCharacterToken.chars += ch;
|
1424
|
-
return;
|
1425
|
-
}
|
1426
1784
|
}
|
1427
1785
|
this._createCharacterToken(type, ch);
|
1428
1786
|
}
|
@@ -1440,59 +1798,11 @@ class Tokenizer {
|
|
1440
1798
|
this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
|
1441
1799
|
}
|
1442
1800
|
// Character reference helpers
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
i = determineBranch(htmlDecodeTree, current, i + 1, cp);
|
1449
|
-
if (i < 0)
|
1450
|
-
break;
|
1451
|
-
excess += 1;
|
1452
|
-
current = htmlDecodeTree[i];
|
1453
|
-
const masked = current & BinTrieFlags.VALUE_LENGTH;
|
1454
|
-
// If the branch is a value, store it and continue
|
1455
|
-
if (masked) {
|
1456
|
-
// The mask is the number of bytes of the value, including the current byte.
|
1457
|
-
const valueLength = (masked >> 14) - 1;
|
1458
|
-
// Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
|
1459
|
-
// See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
1460
|
-
if (cp !== CODE_POINTS.SEMICOLON &&
|
1461
|
-
this._isCharacterReferenceInAttribute() &&
|
1462
|
-
isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))) {
|
1463
|
-
//NOTE: we don't flush all consumed code points here, and instead switch back to the original state after
|
1464
|
-
//emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes.
|
1465
|
-
result = [CODE_POINTS.AMPERSAND];
|
1466
|
-
// Skip over the value.
|
1467
|
-
i += valueLength;
|
1468
|
-
}
|
1469
|
-
else {
|
1470
|
-
// If this is a surrogate pair, consume the next two bytes.
|
1471
|
-
result =
|
1472
|
-
valueLength === 0
|
1473
|
-
? [htmlDecodeTree[i] & ~BinTrieFlags.VALUE_LENGTH]
|
1474
|
-
: valueLength === 1
|
1475
|
-
? [htmlDecodeTree[++i]]
|
1476
|
-
: [htmlDecodeTree[++i], htmlDecodeTree[++i]];
|
1477
|
-
excess = 0;
|
1478
|
-
withoutSemicolon = cp !== CODE_POINTS.SEMICOLON;
|
1479
|
-
}
|
1480
|
-
if (valueLength === 0) {
|
1481
|
-
// If the value is zero-length, we're done.
|
1482
|
-
this._consume();
|
1483
|
-
break;
|
1484
|
-
}
|
1485
|
-
}
|
1486
|
-
}
|
1487
|
-
this._unconsume(excess);
|
1488
|
-
if (withoutSemicolon && !this.preprocessor.endOfChunkHit) {
|
1489
|
-
this._err(ERR.missingSemicolonAfterCharacterReference);
|
1490
|
-
}
|
1491
|
-
// We want to emit the error above on the code point after the entity.
|
1492
|
-
// We always consume one code point too many in the loop, and we wait to
|
1493
|
-
// unconsume it until after the error is emitted.
|
1494
|
-
this._unconsume(1);
|
1495
|
-
return result;
|
1801
|
+
_startCharacterReference() {
|
1802
|
+
this.returnState = this.state;
|
1803
|
+
this.state = State.CHARACTER_REFERENCE;
|
1804
|
+
this.entityStartPos = this.preprocessor.pos;
|
1805
|
+
this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy);
|
1496
1806
|
}
|
1497
1807
|
_isCharacterReferenceInAttribute() {
|
1498
1808
|
return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
|
@@ -1795,37 +2105,13 @@ class Tokenizer {
|
|
1795
2105
|
break;
|
1796
2106
|
}
|
1797
2107
|
case State.CHARACTER_REFERENCE: {
|
1798
|
-
this._stateCharacterReference(
|
1799
|
-
break;
|
1800
|
-
}
|
1801
|
-
case State.NAMED_CHARACTER_REFERENCE: {
|
1802
|
-
this._stateNamedCharacterReference(cp);
|
2108
|
+
this._stateCharacterReference();
|
1803
2109
|
break;
|
1804
2110
|
}
|
1805
2111
|
case State.AMBIGUOUS_AMPERSAND: {
|
1806
2112
|
this._stateAmbiguousAmpersand(cp);
|
1807
2113
|
break;
|
1808
2114
|
}
|
1809
|
-
case State.NUMERIC_CHARACTER_REFERENCE: {
|
1810
|
-
this._stateNumericCharacterReference(cp);
|
1811
|
-
break;
|
1812
|
-
}
|
1813
|
-
case State.HEXADEMICAL_CHARACTER_REFERENCE_START: {
|
1814
|
-
this._stateHexademicalCharacterReferenceStart(cp);
|
1815
|
-
break;
|
1816
|
-
}
|
1817
|
-
case State.HEXADEMICAL_CHARACTER_REFERENCE: {
|
1818
|
-
this._stateHexademicalCharacterReference(cp);
|
1819
|
-
break;
|
1820
|
-
}
|
1821
|
-
case State.DECIMAL_CHARACTER_REFERENCE: {
|
1822
|
-
this._stateDecimalCharacterReference(cp);
|
1823
|
-
break;
|
1824
|
-
}
|
1825
|
-
case State.NUMERIC_CHARACTER_REFERENCE_END: {
|
1826
|
-
this._stateNumericCharacterReferenceEnd(cp);
|
1827
|
-
break;
|
1828
|
-
}
|
1829
2115
|
default: {
|
1830
2116
|
throw new Error('Unknown state');
|
1831
2117
|
}
|
@@ -1841,8 +2127,7 @@ class Tokenizer {
|
|
1841
2127
|
break;
|
1842
2128
|
}
|
1843
2129
|
case CODE_POINTS.AMPERSAND: {
|
1844
|
-
this.
|
1845
|
-
this.state = State.CHARACTER_REFERENCE;
|
2130
|
+
this._startCharacterReference();
|
1846
2131
|
break;
|
1847
2132
|
}
|
1848
2133
|
case CODE_POINTS.NULL: {
|
@@ -1864,8 +2149,7 @@ class Tokenizer {
|
|
1864
2149
|
_stateRcdata(cp) {
|
1865
2150
|
switch (cp) {
|
1866
2151
|
case CODE_POINTS.AMPERSAND: {
|
1867
|
-
this.
|
1868
|
-
this.state = State.CHARACTER_REFERENCE;
|
2152
|
+
this._startCharacterReference();
|
1869
2153
|
break;
|
1870
2154
|
}
|
1871
2155
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
@@ -2634,8 +2918,7 @@ class Tokenizer {
|
|
2634
2918
|
break;
|
2635
2919
|
}
|
2636
2920
|
case CODE_POINTS.AMPERSAND: {
|
2637
|
-
this.
|
2638
|
-
this.state = State.CHARACTER_REFERENCE;
|
2921
|
+
this._startCharacterReference();
|
2639
2922
|
break;
|
2640
2923
|
}
|
2641
2924
|
case CODE_POINTS.NULL: {
|
@@ -2662,8 +2945,7 @@ class Tokenizer {
|
|
2662
2945
|
break;
|
2663
2946
|
}
|
2664
2947
|
case CODE_POINTS.AMPERSAND: {
|
2665
|
-
this.
|
2666
|
-
this.state = State.CHARACTER_REFERENCE;
|
2948
|
+
this._startCharacterReference();
|
2667
2949
|
break;
|
2668
2950
|
}
|
2669
2951
|
case CODE_POINTS.NULL: {
|
@@ -2694,8 +2976,7 @@ class Tokenizer {
|
|
2694
2976
|
break;
|
2695
2977
|
}
|
2696
2978
|
case CODE_POINTS.AMPERSAND: {
|
2697
|
-
this.
|
2698
|
-
this.state = State.CHARACTER_REFERENCE;
|
2979
|
+
this._startCharacterReference();
|
2699
2980
|
break;
|
2700
2981
|
}
|
2701
2982
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
@@ -3711,35 +3992,35 @@ class Tokenizer {
|
|
3711
3992
|
}
|
3712
3993
|
// Character reference state
|
3713
3994
|
//------------------------------------------------------------------
|
3714
|
-
_stateCharacterReference(
|
3715
|
-
|
3716
|
-
|
3717
|
-
|
3718
|
-
|
3719
|
-
|
3720
|
-
|
3995
|
+
_stateCharacterReference() {
|
3996
|
+
let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
|
3997
|
+
if (length < 0) {
|
3998
|
+
if (this.preprocessor.lastChunkWritten) {
|
3999
|
+
length = this.entityDecoder.end();
|
4000
|
+
}
|
4001
|
+
else {
|
4002
|
+
// Wait for the rest of the entity.
|
4003
|
+
this.active = false;
|
4004
|
+
// Mark the entire buffer as read.
|
4005
|
+
this.preprocessor.pos = this.preprocessor.html.length - 1;
|
4006
|
+
this.consumedAfterSnapshot = 0;
|
4007
|
+
this.preprocessor.endOfChunkHit = true;
|
4008
|
+
return;
|
4009
|
+
}
|
3721
4010
|
}
|
3722
|
-
|
4011
|
+
if (length === 0) {
|
4012
|
+
// This was not a valid entity. Go back to the beginning, and
|
4013
|
+
// figure out what to do.
|
4014
|
+
this.preprocessor.pos = this.entityStartPos;
|
3723
4015
|
this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
|
3724
|
-
this.
|
3725
|
-
|
3726
|
-
|
3727
|
-
|
3728
|
-
//------------------------------------------------------------------
|
3729
|
-
_stateNamedCharacterReference(cp) {
|
3730
|
-
const matchResult = this._matchNamedCharacterReference(cp);
|
3731
|
-
//NOTE: Matching can be abrupted by hibernation. In that case, match
|
3732
|
-
//results are no longer valid and we will need to start over.
|
3733
|
-
if (this._ensureHibernation()) ;
|
3734
|
-
else if (matchResult) {
|
3735
|
-
for (let i = 0; i < matchResult.length; i++) {
|
3736
|
-
this._flushCodePointConsumedAsCharacterReference(matchResult[i]);
|
3737
|
-
}
|
3738
|
-
this.state = this.returnState;
|
4016
|
+
this.state =
|
4017
|
+
!this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
|
4018
|
+
? State.AMBIGUOUS_AMPERSAND
|
4019
|
+
: this.returnState;
|
3739
4020
|
}
|
3740
4021
|
else {
|
3741
|
-
|
3742
|
-
this.state =
|
4022
|
+
// We successfully parsed an entity. Switch to the return state.
|
4023
|
+
this.state = this.returnState;
|
3743
4024
|
}
|
3744
4025
|
}
|
3745
4026
|
// Ambiguos ampersand state
|
@@ -3752,107 +4033,10 @@ class Tokenizer {
|
|
3752
4033
|
if (cp === CODE_POINTS.SEMICOLON) {
|
3753
4034
|
this._err(ERR.unknownNamedCharacterReference);
|
3754
4035
|
}
|
3755
|
-
this._reconsumeInState(this.returnState, cp);
|
3756
|
-
}
|
3757
|
-
}
|
3758
|
-
// Numeric character reference state
|
3759
|
-
//------------------------------------------------------------------
|
3760
|
-
_stateNumericCharacterReference(cp) {
|
3761
|
-
this.charRefCode = 0;
|
3762
|
-
if (cp === CODE_POINTS.LATIN_SMALL_X || cp === CODE_POINTS.LATIN_CAPITAL_X) {
|
3763
|
-
this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
|
3764
|
-
}
|
3765
|
-
// Inlined decimal character reference start state
|
3766
|
-
else if (isAsciiDigit(cp)) {
|
3767
|
-
this.state = State.DECIMAL_CHARACTER_REFERENCE;
|
3768
|
-
this._stateDecimalCharacterReference(cp);
|
3769
|
-
}
|
3770
|
-
else {
|
3771
|
-
this._err(ERR.absenceOfDigitsInNumericCharacterReference);
|
3772
|
-
this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
|
3773
|
-
this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.NUMBER_SIGN);
|
3774
|
-
this._reconsumeInState(this.returnState, cp);
|
3775
|
-
}
|
3776
|
-
}
|
3777
|
-
// Hexademical character reference start state
|
3778
|
-
//------------------------------------------------------------------
|
3779
|
-
_stateHexademicalCharacterReferenceStart(cp) {
|
3780
|
-
if (isAsciiHexDigit(cp)) {
|
3781
|
-
this.state = State.HEXADEMICAL_CHARACTER_REFERENCE;
|
3782
|
-
this._stateHexademicalCharacterReference(cp);
|
3783
|
-
}
|
3784
|
-
else {
|
3785
|
-
this._err(ERR.absenceOfDigitsInNumericCharacterReference);
|
3786
|
-
this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
|
3787
|
-
this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.NUMBER_SIGN);
|
3788
|
-
this._unconsume(2);
|
3789
4036
|
this.state = this.returnState;
|
4037
|
+
this._callState(cp);
|
3790
4038
|
}
|
3791
4039
|
}
|
3792
|
-
// Hexademical character reference state
|
3793
|
-
//------------------------------------------------------------------
|
3794
|
-
_stateHexademicalCharacterReference(cp) {
|
3795
|
-
if (isAsciiUpperHexDigit(cp)) {
|
3796
|
-
this.charRefCode = this.charRefCode * 16 + cp - 0x37;
|
3797
|
-
}
|
3798
|
-
else if (isAsciiLowerHexDigit(cp)) {
|
3799
|
-
this.charRefCode = this.charRefCode * 16 + cp - 0x57;
|
3800
|
-
}
|
3801
|
-
else if (isAsciiDigit(cp)) {
|
3802
|
-
this.charRefCode = this.charRefCode * 16 + cp - 0x30;
|
3803
|
-
}
|
3804
|
-
else if (cp === CODE_POINTS.SEMICOLON) {
|
3805
|
-
this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
|
3806
|
-
}
|
3807
|
-
else {
|
3808
|
-
this._err(ERR.missingSemicolonAfterCharacterReference);
|
3809
|
-
this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
|
3810
|
-
this._stateNumericCharacterReferenceEnd(cp);
|
3811
|
-
}
|
3812
|
-
}
|
3813
|
-
// Decimal character reference state
|
3814
|
-
//------------------------------------------------------------------
|
3815
|
-
_stateDecimalCharacterReference(cp) {
|
3816
|
-
if (isAsciiDigit(cp)) {
|
3817
|
-
this.charRefCode = this.charRefCode * 10 + cp - 0x30;
|
3818
|
-
}
|
3819
|
-
else if (cp === CODE_POINTS.SEMICOLON) {
|
3820
|
-
this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
|
3821
|
-
}
|
3822
|
-
else {
|
3823
|
-
this._err(ERR.missingSemicolonAfterCharacterReference);
|
3824
|
-
this.state = State.NUMERIC_CHARACTER_REFERENCE_END;
|
3825
|
-
this._stateNumericCharacterReferenceEnd(cp);
|
3826
|
-
}
|
3827
|
-
}
|
3828
|
-
// Numeric character reference end state
|
3829
|
-
//------------------------------------------------------------------
|
3830
|
-
_stateNumericCharacterReferenceEnd(cp) {
|
3831
|
-
if (this.charRefCode === CODE_POINTS.NULL) {
|
3832
|
-
this._err(ERR.nullCharacterReference);
|
3833
|
-
this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
|
3834
|
-
}
|
3835
|
-
else if (this.charRefCode > 1114111) {
|
3836
|
-
this._err(ERR.characterReferenceOutsideUnicodeRange);
|
3837
|
-
this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
|
3838
|
-
}
|
3839
|
-
else if (isSurrogate(this.charRefCode)) {
|
3840
|
-
this._err(ERR.surrogateCharacterReference);
|
3841
|
-
this.charRefCode = CODE_POINTS.REPLACEMENT_CHARACTER;
|
3842
|
-
}
|
3843
|
-
else if (isUndefinedCodePoint(this.charRefCode)) {
|
3844
|
-
this._err(ERR.noncharacterCharacterReference);
|
3845
|
-
}
|
3846
|
-
else if (isControlCodePoint(this.charRefCode) || this.charRefCode === CODE_POINTS.CARRIAGE_RETURN) {
|
3847
|
-
this._err(ERR.controlCharacterReference);
|
3848
|
-
const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode);
|
3849
|
-
if (replacement !== undefined) {
|
3850
|
-
this.charRefCode = replacement;
|
3851
|
-
}
|
3852
|
-
}
|
3853
|
-
this._flushCodePointConsumedAsCharacterReference(this.charRefCode);
|
3854
|
-
this._reconsumeInState(this.returnState, cp);
|
3855
|
-
}
|
3856
4040
|
}
|
3857
4041
|
|
3858
4042
|
//Element utils
|
@@ -3868,31 +4052,25 @@ const IMPLICIT_END_TAG_REQUIRED_THOROUGHLY = new Set([
|
|
3868
4052
|
TAG_ID.THEAD,
|
3869
4053
|
TAG_ID.TR,
|
3870
4054
|
]);
|
3871
|
-
const
|
3872
|
-
|
3873
|
-
|
3874
|
-
|
3875
|
-
|
3876
|
-
|
3877
|
-
|
3878
|
-
|
3879
|
-
|
3880
|
-
|
3881
|
-
[TAG_ID.ANNOTATION_XML, NS.MATHML],
|
3882
|
-
[TAG_ID.MI, NS.MATHML],
|
3883
|
-
[TAG_ID.MN, NS.MATHML],
|
3884
|
-
[TAG_ID.MO, NS.MATHML],
|
3885
|
-
[TAG_ID.MS, NS.MATHML],
|
3886
|
-
[TAG_ID.MTEXT, NS.MATHML],
|
3887
|
-
[TAG_ID.DESC, NS.SVG],
|
3888
|
-
[TAG_ID.FOREIGN_OBJECT, NS.SVG],
|
3889
|
-
[TAG_ID.TITLE, NS.SVG],
|
4055
|
+
const SCOPING_ELEMENTS_HTML = new Set([
|
4056
|
+
TAG_ID.APPLET,
|
4057
|
+
TAG_ID.CAPTION,
|
4058
|
+
TAG_ID.HTML,
|
4059
|
+
TAG_ID.MARQUEE,
|
4060
|
+
TAG_ID.OBJECT,
|
4061
|
+
TAG_ID.TABLE,
|
4062
|
+
TAG_ID.TD,
|
4063
|
+
TAG_ID.TEMPLATE,
|
4064
|
+
TAG_ID.TH,
|
3890
4065
|
]);
|
3891
|
-
const
|
3892
|
-
const
|
3893
|
-
const
|
3894
|
-
const
|
3895
|
-
const
|
4066
|
+
const SCOPING_ELEMENTS_HTML_LIST = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.OL, TAG_ID.UL]);
|
4067
|
+
const SCOPING_ELEMENTS_HTML_BUTTON = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.BUTTON]);
|
4068
|
+
const SCOPING_ELEMENTS_MATHML = new Set([TAG_ID.ANNOTATION_XML, TAG_ID.MI, TAG_ID.MN, TAG_ID.MO, TAG_ID.MS, TAG_ID.MTEXT]);
|
4069
|
+
const SCOPING_ELEMENTS_SVG = new Set([TAG_ID.DESC, TAG_ID.FOREIGN_OBJECT, TAG_ID.TITLE]);
|
4070
|
+
const TABLE_ROW_CONTEXT = new Set([TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML]);
|
4071
|
+
const TABLE_BODY_CONTEXT = new Set([TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML]);
|
4072
|
+
const TABLE_CONTEXT = new Set([TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML]);
|
4073
|
+
const TABLE_CELLS = new Set([TAG_ID.TD, TAG_ID.TH]);
|
3896
4074
|
//Stack of open elements
|
3897
4075
|
class OpenElementStack {
|
3898
4076
|
get currentTmplContentOrNode() {
|
@@ -3985,7 +4163,7 @@ class OpenElementStack {
|
|
3985
4163
|
this.shortenToLength(idx < 0 ? 0 : idx);
|
3986
4164
|
}
|
3987
4165
|
popUntilNumberedHeaderPopped() {
|
3988
|
-
this.popUntilPopped(
|
4166
|
+
this.popUntilPopped(NUMBERED_HEADERS, NS.HTML);
|
3989
4167
|
}
|
3990
4168
|
popUntilTableCellPopped() {
|
3991
4169
|
this.popUntilPopped(TABLE_CELLS, NS.HTML);
|
@@ -3998,7 +4176,7 @@ class OpenElementStack {
|
|
3998
4176
|
}
|
3999
4177
|
_indexOfTagNames(tagNames, namespace) {
|
4000
4178
|
for (let i = this.stackTop; i >= 0; i--) {
|
4001
|
-
if (tagNames.
|
4179
|
+
if (tagNames.has(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) {
|
4002
4180
|
return i;
|
4003
4181
|
}
|
4004
4182
|
}
|
@@ -4048,102 +4226,117 @@ class OpenElementStack {
|
|
4048
4226
|
return this.stackTop === 0 && this.tagIDs[0] === TAG_ID.HTML;
|
4049
4227
|
}
|
4050
4228
|
//Element in scope
|
4051
|
-
|
4229
|
+
hasInDynamicScope(tagName, htmlScope) {
|
4052
4230
|
for (let i = this.stackTop; i >= 0; i--) {
|
4053
4231
|
const tn = this.tagIDs[i];
|
4054
|
-
|
4055
|
-
|
4056
|
-
|
4057
|
-
|
4058
|
-
|
4059
|
-
|
4232
|
+
switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
|
4233
|
+
case NS.HTML: {
|
4234
|
+
if (tn === tagName)
|
4235
|
+
return true;
|
4236
|
+
if (htmlScope.has(tn))
|
4237
|
+
return false;
|
4238
|
+
break;
|
4239
|
+
}
|
4240
|
+
case NS.SVG: {
|
4241
|
+
if (SCOPING_ELEMENTS_SVG.has(tn))
|
4242
|
+
return false;
|
4243
|
+
break;
|
4244
|
+
}
|
4245
|
+
case NS.MATHML: {
|
4246
|
+
if (SCOPING_ELEMENTS_MATHML.has(tn))
|
4247
|
+
return false;
|
4248
|
+
break;
|
4249
|
+
}
|
4060
4250
|
}
|
4061
4251
|
}
|
4062
4252
|
return true;
|
4063
4253
|
}
|
4064
|
-
|
4065
|
-
|
4066
|
-
const tn = this.tagIDs[i];
|
4067
|
-
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
|
4068
|
-
if (isNumberedHeader(tn) && ns === NS.HTML) {
|
4069
|
-
return true;
|
4070
|
-
}
|
4071
|
-
if (SCOPING_ELEMENT_NS.get(tn) === ns) {
|
4072
|
-
return false;
|
4073
|
-
}
|
4074
|
-
}
|
4075
|
-
return true;
|
4254
|
+
hasInScope(tagName) {
|
4255
|
+
return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML);
|
4076
4256
|
}
|
4077
4257
|
hasInListItemScope(tagName) {
|
4078
|
-
|
4079
|
-
const tn = this.tagIDs[i];
|
4080
|
-
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
|
4081
|
-
if (tn === tagName && ns === NS.HTML) {
|
4082
|
-
return true;
|
4083
|
-
}
|
4084
|
-
if (((tn === TAG_ID.UL || tn === TAG_ID.OL) && ns === NS.HTML) || SCOPING_ELEMENT_NS.get(tn) === ns) {
|
4085
|
-
return false;
|
4086
|
-
}
|
4087
|
-
}
|
4088
|
-
return true;
|
4258
|
+
return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_LIST);
|
4089
4259
|
}
|
4090
4260
|
hasInButtonScope(tagName) {
|
4261
|
+
return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_BUTTON);
|
4262
|
+
}
|
4263
|
+
hasNumberedHeaderInScope() {
|
4091
4264
|
for (let i = this.stackTop; i >= 0; i--) {
|
4092
4265
|
const tn = this.tagIDs[i];
|
4093
|
-
|
4094
|
-
|
4095
|
-
|
4096
|
-
|
4097
|
-
|
4098
|
-
|
4266
|
+
switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
|
4267
|
+
case NS.HTML: {
|
4268
|
+
if (NUMBERED_HEADERS.has(tn))
|
4269
|
+
return true;
|
4270
|
+
if (SCOPING_ELEMENTS_HTML.has(tn))
|
4271
|
+
return false;
|
4272
|
+
break;
|
4273
|
+
}
|
4274
|
+
case NS.SVG: {
|
4275
|
+
if (SCOPING_ELEMENTS_SVG.has(tn))
|
4276
|
+
return false;
|
4277
|
+
break;
|
4278
|
+
}
|
4279
|
+
case NS.MATHML: {
|
4280
|
+
if (SCOPING_ELEMENTS_MATHML.has(tn))
|
4281
|
+
return false;
|
4282
|
+
break;
|
4283
|
+
}
|
4099
4284
|
}
|
4100
4285
|
}
|
4101
4286
|
return true;
|
4102
4287
|
}
|
4103
4288
|
hasInTableScope(tagName) {
|
4104
4289
|
for (let i = this.stackTop; i >= 0; i--) {
|
4105
|
-
|
4106
|
-
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
|
4107
|
-
if (ns !== NS.HTML) {
|
4290
|
+
if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
|
4108
4291
|
continue;
|
4109
4292
|
}
|
4110
|
-
|
4111
|
-
|
4112
|
-
|
4113
|
-
|
4114
|
-
|
4293
|
+
switch (this.tagIDs[i]) {
|
4294
|
+
case tagName: {
|
4295
|
+
return true;
|
4296
|
+
}
|
4297
|
+
case TAG_ID.TABLE:
|
4298
|
+
case TAG_ID.HTML: {
|
4299
|
+
return false;
|
4300
|
+
}
|
4115
4301
|
}
|
4116
4302
|
}
|
4117
4303
|
return true;
|
4118
4304
|
}
|
4119
4305
|
hasTableBodyContextInTableScope() {
|
4120
4306
|
for (let i = this.stackTop; i >= 0; i--) {
|
4121
|
-
|
4122
|
-
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
|
4123
|
-
if (ns !== NS.HTML) {
|
4307
|
+
if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
|
4124
4308
|
continue;
|
4125
4309
|
}
|
4126
|
-
|
4127
|
-
|
4128
|
-
|
4129
|
-
|
4130
|
-
|
4310
|
+
switch (this.tagIDs[i]) {
|
4311
|
+
case TAG_ID.TBODY:
|
4312
|
+
case TAG_ID.THEAD:
|
4313
|
+
case TAG_ID.TFOOT: {
|
4314
|
+
return true;
|
4315
|
+
}
|
4316
|
+
case TAG_ID.TABLE:
|
4317
|
+
case TAG_ID.HTML: {
|
4318
|
+
return false;
|
4319
|
+
}
|
4131
4320
|
}
|
4132
4321
|
}
|
4133
4322
|
return true;
|
4134
4323
|
}
|
4135
4324
|
hasInSelectScope(tagName) {
|
4136
4325
|
for (let i = this.stackTop; i >= 0; i--) {
|
4137
|
-
|
4138
|
-
const ns = this.treeAdapter.getNamespaceURI(this.items[i]);
|
4139
|
-
if (ns !== NS.HTML) {
|
4326
|
+
if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
|
4140
4327
|
continue;
|
4141
4328
|
}
|
4142
|
-
|
4143
|
-
|
4144
|
-
|
4145
|
-
|
4146
|
-
|
4329
|
+
switch (this.tagIDs[i]) {
|
4330
|
+
case tagName: {
|
4331
|
+
return true;
|
4332
|
+
}
|
4333
|
+
case TAG_ID.OPTION:
|
4334
|
+
case TAG_ID.OPTGROUP: {
|
4335
|
+
break;
|
4336
|
+
}
|
4337
|
+
default: {
|
4338
|
+
return false;
|
4339
|
+
}
|
4147
4340
|
}
|
4148
4341
|
}
|
4149
4342
|
return true;
|
@@ -4172,7 +4365,7 @@ var EntryType;
|
|
4172
4365
|
(function (EntryType) {
|
4173
4366
|
EntryType[EntryType["Marker"] = 0] = "Marker";
|
4174
4367
|
EntryType[EntryType["Element"] = 1] = "Element";
|
4175
|
-
})(EntryType
|
4368
|
+
})(EntryType || (EntryType = {}));
|
4176
4369
|
const MARKER = { type: EntryType.Marker };
|
4177
4370
|
//List of formatting elements
|
4178
4371
|
class FormattingElementList {
|
@@ -4277,13 +4470,6 @@ class FormattingElementList {
|
|
4277
4470
|
}
|
4278
4471
|
}
|
4279
4472
|
|
4280
|
-
function createTextNode(value) {
|
4281
|
-
return {
|
4282
|
-
nodeName: '#text',
|
4283
|
-
value,
|
4284
|
-
parentNode: null,
|
4285
|
-
};
|
4286
|
-
}
|
4287
4473
|
const defaultTreeAdapter = {
|
4288
4474
|
//Node construction
|
4289
4475
|
createDocument() {
|
@@ -4316,6 +4502,13 @@ const defaultTreeAdapter = {
|
|
4316
4502
|
parentNode: null,
|
4317
4503
|
};
|
4318
4504
|
},
|
4505
|
+
createTextNode(value) {
|
4506
|
+
return {
|
4507
|
+
nodeName: '#text',
|
4508
|
+
value,
|
4509
|
+
parentNode: null,
|
4510
|
+
};
|
4511
|
+
},
|
4319
4512
|
//Tree mutation
|
4320
4513
|
appendChild(parentNode, newNode) {
|
4321
4514
|
parentNode.childNodes.push(newNode);
|
@@ -4371,7 +4564,7 @@ const defaultTreeAdapter = {
|
|
4371
4564
|
return;
|
4372
4565
|
}
|
4373
4566
|
}
|
4374
|
-
defaultTreeAdapter.appendChild(parentNode, createTextNode(text));
|
4567
|
+
defaultTreeAdapter.appendChild(parentNode, defaultTreeAdapter.createTextNode(text));
|
4375
4568
|
},
|
4376
4569
|
insertTextBefore(parentNode, text, referenceNode) {
|
4377
4570
|
const prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];
|
@@ -4379,7 +4572,7 @@ const defaultTreeAdapter = {
|
|
4379
4572
|
prevNode.value += text;
|
4380
4573
|
}
|
4381
4574
|
else {
|
4382
|
-
defaultTreeAdapter.insertBefore(parentNode, createTextNode(text), referenceNode);
|
4575
|
+
defaultTreeAdapter.insertBefore(parentNode, defaultTreeAdapter.createTextNode(text), referenceNode);
|
4383
4576
|
}
|
4384
4577
|
},
|
4385
4578
|
adoptAttributes(recipient, attrs) {
|
@@ -4640,7 +4833,6 @@ const XML_ATTRS_ADJUSTMENT_MAP = new Map([
|
|
4640
4833
|
['xlink:show', { prefix: 'xlink', name: 'show', namespace: NS.XLINK }],
|
4641
4834
|
['xlink:title', { prefix: 'xlink', name: 'title', namespace: NS.XLINK }],
|
4642
4835
|
['xlink:type', { prefix: 'xlink', name: 'type', namespace: NS.XLINK }],
|
4643
|
-
['xml:base', { prefix: 'xml', name: 'base', namespace: NS.XML }],
|
4644
4836
|
['xml:lang', { prefix: 'xml', name: 'lang', namespace: NS.XML }],
|
4645
4837
|
['xml:space', { prefix: 'xml', name: 'space', namespace: NS.XML }],
|
4646
4838
|
['xmlns', { prefix: '', name: 'xmlns', namespace: NS.XMLNS }],
|
@@ -4842,26 +5034,41 @@ const defaultParserOptions = {
|
|
4842
5034
|
};
|
4843
5035
|
//Parser
|
4844
5036
|
class Parser {
|
4845
|
-
constructor(options, document,
|
5037
|
+
constructor(options, document,
|
5038
|
+
/** @internal */
|
5039
|
+
fragmentContext = null,
|
5040
|
+
/** @internal */
|
5041
|
+
scriptHandler = null) {
|
4846
5042
|
this.fragmentContext = fragmentContext;
|
4847
5043
|
this.scriptHandler = scriptHandler;
|
4848
5044
|
this.currentToken = null;
|
4849
5045
|
this.stopped = false;
|
5046
|
+
/** @internal */
|
4850
5047
|
this.insertionMode = InsertionMode.INITIAL;
|
5048
|
+
/** @internal */
|
4851
5049
|
this.originalInsertionMode = InsertionMode.INITIAL;
|
5050
|
+
/** @internal */
|
4852
5051
|
this.headElement = null;
|
5052
|
+
/** @internal */
|
4853
5053
|
this.formElement = null;
|
4854
5054
|
/** Indicates that the current node is not an element in the HTML namespace */
|
4855
5055
|
this.currentNotInHTML = false;
|
4856
5056
|
/**
|
4857
5057
|
* The template insertion mode stack is maintained from the left.
|
4858
5058
|
* Ie. the topmost element will always have index 0.
|
5059
|
+
*
|
5060
|
+
* @internal
|
4859
5061
|
*/
|
4860
5062
|
this.tmplInsertionModeStack = [];
|
5063
|
+
/** @internal */
|
4861
5064
|
this.pendingCharacterTokens = [];
|
5065
|
+
/** @internal */
|
4862
5066
|
this.hasNonWhitespacePendingCharacterToken = false;
|
5067
|
+
/** @internal */
|
4863
5068
|
this.framesetOk = true;
|
5069
|
+
/** @internal */
|
4864
5070
|
this.skipNextNewLine = false;
|
5071
|
+
/** @internal */
|
4865
5072
|
this.fosterParentingEnabled = false;
|
4866
5073
|
this.options = {
|
4867
5074
|
...defaultParserOptions,
|
@@ -4915,6 +5122,7 @@ class Parser {
|
|
4915
5122
|
return fragment;
|
4916
5123
|
}
|
4917
5124
|
//Errors
|
5125
|
+
/** @internal */
|
4918
5126
|
_err(token, code, beforeToken) {
|
4919
5127
|
var _a;
|
4920
5128
|
if (!this.onParseError)
|
@@ -4932,12 +5140,14 @@ class Parser {
|
|
4932
5140
|
this.onParseError(err);
|
4933
5141
|
}
|
4934
5142
|
//Stack events
|
5143
|
+
/** @internal */
|
4935
5144
|
onItemPush(node, tid, isTop) {
|
4936
5145
|
var _a, _b;
|
4937
5146
|
(_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node);
|
4938
5147
|
if (isTop && this.openElements.stackTop > 0)
|
4939
5148
|
this._setContextModes(node, tid);
|
4940
5149
|
}
|
5150
|
+
/** @internal */
|
4941
5151
|
onItemPop(node, isTop) {
|
4942
5152
|
var _a, _b;
|
4943
5153
|
if (this.options.sourceCodeLocationInfo) {
|
@@ -4962,6 +5172,7 @@ class Parser {
|
|
4962
5172
|
this.currentNotInHTML = !isHTML;
|
4963
5173
|
this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current);
|
4964
5174
|
}
|
5175
|
+
/** @protected */
|
4965
5176
|
_switchToTextParsing(currentToken, nextTokenizerState) {
|
4966
5177
|
this._insertElement(currentToken, NS.HTML);
|
4967
5178
|
this.tokenizer.state = nextTokenizerState;
|
@@ -4974,11 +5185,13 @@ class Parser {
|
|
4974
5185
|
this.tokenizer.state = TokenizerMode.PLAINTEXT;
|
4975
5186
|
}
|
4976
5187
|
//Fragment parsing
|
5188
|
+
/** @protected */
|
4977
5189
|
_getAdjustedCurrentElement() {
|
4978
5190
|
return this.openElements.stackTop === 0 && this.fragmentContext
|
4979
5191
|
? this.fragmentContext
|
4980
5192
|
: this.openElements.current;
|
4981
5193
|
}
|
5194
|
+
/** @protected */
|
4982
5195
|
_findFormInFragmentContext() {
|
4983
5196
|
let node = this.fragmentContext;
|
4984
5197
|
while (node) {
|
@@ -5020,6 +5233,7 @@ class Parser {
|
|
5020
5233
|
}
|
5021
5234
|
}
|
5022
5235
|
//Tree mutation
|
5236
|
+
/** @protected */
|
5023
5237
|
_setDocumentType(token) {
|
5024
5238
|
const name = token.name || '';
|
5025
5239
|
const publicId = token.publicId || '';
|
@@ -5033,6 +5247,7 @@ class Parser {
|
|
5033
5247
|
}
|
5034
5248
|
}
|
5035
5249
|
}
|
5250
|
+
/** @protected */
|
5036
5251
|
_attachElementToTree(element, location) {
|
5037
5252
|
if (this.options.sourceCodeLocationInfo) {
|
5038
5253
|
const loc = location && {
|
@@ -5049,20 +5264,28 @@ class Parser {
|
|
5049
5264
|
this.treeAdapter.appendChild(parent, element);
|
5050
5265
|
}
|
5051
5266
|
}
|
5267
|
+
/**
|
5268
|
+
* For self-closing tags. Add an element to the tree, but skip adding it
|
5269
|
+
* to the stack.
|
5270
|
+
*/
|
5271
|
+
/** @protected */
|
5052
5272
|
_appendElement(token, namespaceURI) {
|
5053
5273
|
const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
|
5054
5274
|
this._attachElementToTree(element, token.location);
|
5055
5275
|
}
|
5276
|
+
/** @protected */
|
5056
5277
|
_insertElement(token, namespaceURI) {
|
5057
5278
|
const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
|
5058
5279
|
this._attachElementToTree(element, token.location);
|
5059
5280
|
this.openElements.push(element, token.tagID);
|
5060
5281
|
}
|
5282
|
+
/** @protected */
|
5061
5283
|
_insertFakeElement(tagName, tagID) {
|
5062
5284
|
const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
|
5063
5285
|
this._attachElementToTree(element, null);
|
5064
5286
|
this.openElements.push(element, tagID);
|
5065
5287
|
}
|
5288
|
+
/** @protected */
|
5066
5289
|
_insertTemplate(token) {
|
5067
5290
|
const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
|
5068
5291
|
const content = this.treeAdapter.createDocumentFragment();
|
@@ -5072,6 +5295,7 @@ class Parser {
|
|
5072
5295
|
if (this.options.sourceCodeLocationInfo)
|
5073
5296
|
this.treeAdapter.setNodeSourceCodeLocation(content, null);
|
5074
5297
|
}
|
5298
|
+
/** @protected */
|
5075
5299
|
_insertFakeRootElement() {
|
5076
5300
|
const element = this.treeAdapter.createElement(TAG_NAMES.HTML, NS.HTML, []);
|
5077
5301
|
if (this.options.sourceCodeLocationInfo)
|
@@ -5079,6 +5303,7 @@ class Parser {
|
|
5079
5303
|
this.treeAdapter.appendChild(this.openElements.current, element);
|
5080
5304
|
this.openElements.push(element, TAG_ID.HTML);
|
5081
5305
|
}
|
5306
|
+
/** @protected */
|
5082
5307
|
_appendCommentNode(token, parent) {
|
5083
5308
|
const commentNode = this.treeAdapter.createCommentNode(token.data);
|
5084
5309
|
this.treeAdapter.appendChild(parent, commentNode);
|
@@ -5086,6 +5311,7 @@ class Parser {
|
|
5086
5311
|
this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
|
5087
5312
|
}
|
5088
5313
|
}
|
5314
|
+
/** @protected */
|
5089
5315
|
_insertCharacters(token) {
|
5090
5316
|
let parent;
|
5091
5317
|
let beforeElement;
|
@@ -5117,12 +5343,14 @@ class Parser {
|
|
5117
5343
|
this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
|
5118
5344
|
}
|
5119
5345
|
}
|
5346
|
+
/** @protected */
|
5120
5347
|
_adoptNodes(donor, recipient) {
|
5121
5348
|
for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
|
5122
5349
|
this.treeAdapter.detachNode(child);
|
5123
5350
|
this.treeAdapter.appendChild(recipient, child);
|
5124
5351
|
}
|
5125
5352
|
}
|
5353
|
+
/** @protected */
|
5126
5354
|
_setEndLocation(element, closingToken) {
|
5127
5355
|
if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) {
|
5128
5356
|
const ctLoc = closingToken.location;
|
@@ -5172,6 +5400,7 @@ class Parser {
|
|
5172
5400
|
((token.tagID === TAG_ID.MGLYPH || token.tagID === TAG_ID.MALIGNMARK) &&
|
5173
5401
|
!this._isIntegrationPoint(currentTagId, current, NS.HTML)));
|
5174
5402
|
}
|
5403
|
+
/** @protected */
|
5175
5404
|
_processToken(token) {
|
5176
5405
|
switch (token.type) {
|
5177
5406
|
case TokenType.CHARACTER: {
|
@@ -5209,12 +5438,14 @@ class Parser {
|
|
5209
5438
|
}
|
5210
5439
|
}
|
5211
5440
|
//Integration points
|
5441
|
+
/** @protected */
|
5212
5442
|
_isIntegrationPoint(tid, element, foreignNS) {
|
5213
5443
|
const ns = this.treeAdapter.getNamespaceURI(element);
|
5214
5444
|
const attrs = this.treeAdapter.getAttrList(element);
|
5215
5445
|
return isIntegrationPoint(tid, ns, attrs, foreignNS);
|
5216
5446
|
}
|
5217
5447
|
//Active formatting elements reconstruction
|
5448
|
+
/** @protected */
|
5218
5449
|
_reconstructActiveFormattingElements() {
|
5219
5450
|
const listLength = this.activeFormattingElements.entries.length;
|
5220
5451
|
if (listLength) {
|
@@ -5228,17 +5459,20 @@ class Parser {
|
|
5228
5459
|
}
|
5229
5460
|
}
|
5230
5461
|
//Close elements
|
5462
|
+
/** @protected */
|
5231
5463
|
_closeTableCell() {
|
5232
5464
|
this.openElements.generateImpliedEndTags();
|
5233
5465
|
this.openElements.popUntilTableCellPopped();
|
5234
5466
|
this.activeFormattingElements.clearToLastMarker();
|
5235
5467
|
this.insertionMode = InsertionMode.IN_ROW;
|
5236
5468
|
}
|
5469
|
+
/** @protected */
|
5237
5470
|
_closePElement() {
|
5238
5471
|
this.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.P);
|
5239
5472
|
this.openElements.popUntilTagNamePopped(TAG_ID.P);
|
5240
5473
|
}
|
5241
5474
|
//Insertion modes
|
5475
|
+
/** @protected */
|
5242
5476
|
_resetInsertionMode() {
|
5243
5477
|
for (let i = this.openElements.stackTop; i >= 0; i--) {
|
5244
5478
|
//Insertion mode reset map
|
@@ -5304,6 +5538,7 @@ class Parser {
|
|
5304
5538
|
}
|
5305
5539
|
this.insertionMode = InsertionMode.IN_BODY;
|
5306
5540
|
}
|
5541
|
+
/** @protected */
|
5307
5542
|
_resetInsertionModeForSelect(selectIdx) {
|
5308
5543
|
if (selectIdx > 0) {
|
5309
5544
|
for (let i = selectIdx - 1; i > 0; i--) {
|
@@ -5320,12 +5555,15 @@ class Parser {
|
|
5320
5555
|
this.insertionMode = InsertionMode.IN_SELECT;
|
5321
5556
|
}
|
5322
5557
|
//Foster parenting
|
5558
|
+
/** @protected */
|
5323
5559
|
_isElementCausesFosterParenting(tn) {
|
5324
5560
|
return TABLE_STRUCTURE_TAGS.has(tn);
|
5325
5561
|
}
|
5562
|
+
/** @protected */
|
5326
5563
|
_shouldFosterParentOnInsertion() {
|
5327
5564
|
return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId);
|
5328
5565
|
}
|
5566
|
+
/** @protected */
|
5329
5567
|
_findFosterParentingLocation() {
|
5330
5568
|
for (let i = this.openElements.stackTop; i >= 0; i--) {
|
5331
5569
|
const openElement = this.openElements.items[i];
|
@@ -5348,6 +5586,7 @@ class Parser {
|
|
5348
5586
|
}
|
5349
5587
|
return { parent: this.openElements.items[0], beforeElement: null };
|
5350
5588
|
}
|
5589
|
+
/** @protected */
|
5351
5590
|
_fosterParentElement(element) {
|
5352
5591
|
const location = this._findFosterParentingLocation();
|
5353
5592
|
if (location.beforeElement) {
|
@@ -5358,10 +5597,12 @@ class Parser {
|
|
5358
5597
|
}
|
5359
5598
|
}
|
5360
5599
|
//Special elements
|
5600
|
+
/** @protected */
|
5361
5601
|
_isSpecialElement(element, id) {
|
5362
5602
|
const ns = this.treeAdapter.getNamespaceURI(element);
|
5363
5603
|
return SPECIAL_ELEMENTS[ns].has(id);
|
5364
5604
|
}
|
5605
|
+
/** @internal */
|
5365
5606
|
onCharacter(token) {
|
5366
5607
|
this.skipNextNewLine = false;
|
5367
5608
|
if (this.tokenizer.inForeignNode) {
|
@@ -5431,6 +5672,7 @@ class Parser {
|
|
5431
5672
|
// Do nothing
|
5432
5673
|
}
|
5433
5674
|
}
|
5675
|
+
/** @internal */
|
5434
5676
|
onNullCharacter(token) {
|
5435
5677
|
this.skipNextNewLine = false;
|
5436
5678
|
if (this.tokenizer.inForeignNode) {
|
@@ -5487,6 +5729,7 @@ class Parser {
|
|
5487
5729
|
// Do nothing
|
5488
5730
|
}
|
5489
5731
|
}
|
5732
|
+
/** @internal */
|
5490
5733
|
onComment(token) {
|
5491
5734
|
this.skipNextNewLine = false;
|
5492
5735
|
if (this.currentNotInHTML) {
|
@@ -5531,6 +5774,7 @@ class Parser {
|
|
5531
5774
|
// Do nothing
|
5532
5775
|
}
|
5533
5776
|
}
|
5777
|
+
/** @internal */
|
5534
5778
|
onDoctype(token) {
|
5535
5779
|
this.skipNextNewLine = false;
|
5536
5780
|
switch (this.insertionMode) {
|
@@ -5552,6 +5796,7 @@ class Parser {
|
|
5552
5796
|
// Do nothing
|
5553
5797
|
}
|
5554
5798
|
}
|
5799
|
+
/** @internal */
|
5555
5800
|
onStartTag(token) {
|
5556
5801
|
this.skipNextNewLine = false;
|
5557
5802
|
this.currentToken = token;
|
@@ -5569,6 +5814,7 @@ class Parser {
|
|
5569
5814
|
* for nested calls.
|
5570
5815
|
*
|
5571
5816
|
* @param token The token to process.
|
5817
|
+
* @protected
|
5572
5818
|
*/
|
5573
5819
|
_processStartTag(token) {
|
5574
5820
|
if (this.shouldProcessStartTagTokenInForeignContent(token)) {
|
@@ -5578,6 +5824,7 @@ class Parser {
|
|
5578
5824
|
this._startTagOutsideForeignContent(token);
|
5579
5825
|
}
|
5580
5826
|
}
|
5827
|
+
/** @protected */
|
5581
5828
|
_startTagOutsideForeignContent(token) {
|
5582
5829
|
switch (this.insertionMode) {
|
5583
5830
|
case InsertionMode.INITIAL: {
|
@@ -5671,6 +5918,7 @@ class Parser {
|
|
5671
5918
|
// Do nothing
|
5672
5919
|
}
|
5673
5920
|
}
|
5921
|
+
/** @internal */
|
5674
5922
|
onEndTag(token) {
|
5675
5923
|
this.skipNextNewLine = false;
|
5676
5924
|
this.currentToken = token;
|
@@ -5681,6 +5929,7 @@ class Parser {
|
|
5681
5929
|
this._endTagOutsideForeignContent(token);
|
5682
5930
|
}
|
5683
5931
|
}
|
5932
|
+
/** @protected */
|
5684
5933
|
_endTagOutsideForeignContent(token) {
|
5685
5934
|
switch (this.insertionMode) {
|
5686
5935
|
case InsertionMode.INITIAL: {
|
@@ -5774,6 +6023,7 @@ class Parser {
|
|
5774
6023
|
// Do nothing
|
5775
6024
|
}
|
5776
6025
|
}
|
6026
|
+
/** @internal */
|
5777
6027
|
onEof(token) {
|
5778
6028
|
switch (this.insertionMode) {
|
5779
6029
|
case InsertionMode.INITIAL: {
|
@@ -5835,6 +6085,7 @@ class Parser {
|
|
5835
6085
|
// Do nothing
|
5836
6086
|
}
|
5837
6087
|
}
|
6088
|
+
/** @internal */
|
5838
6089
|
onWhitespaceCharacter(token) {
|
5839
6090
|
if (this.skipNextNewLine) {
|
5840
6091
|
this.skipNextNewLine = false;
|
@@ -6405,7 +6656,7 @@ function numberedHeaderStartTagInBody(p, token) {
|
|
6405
6656
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
6406
6657
|
p._closePElement();
|
6407
6658
|
}
|
6408
|
-
if (
|
6659
|
+
if (NUMBERED_HEADERS.has(p.openElements.currentTagId)) {
|
6409
6660
|
p.openElements.pop();
|
6410
6661
|
}
|
6411
6662
|
p._insertElement(token, NS.HTML);
|
@@ -6567,9 +6818,9 @@ function iframeStartTagInBody(p, token) {
|
|
6567
6818
|
p.framesetOk = false;
|
6568
6819
|
p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
|
6569
6820
|
}
|
6570
|
-
//NOTE: here we assume that we always act as
|
6571
|
-
//<noembed> as rawtext.
|
6572
|
-
function
|
6821
|
+
//NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse
|
6822
|
+
//<noembed>/<noframes> as rawtext.
|
6823
|
+
function rawTextStartTagInBody(p, token) {
|
6573
6824
|
p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
|
6574
6825
|
}
|
6575
6826
|
function selectStartTagInBody(p, token) {
|
@@ -6681,6 +6932,7 @@ function startTagInBody(p, token) {
|
|
6681
6932
|
case TAG_ID.DETAILS:
|
6682
6933
|
case TAG_ID.ADDRESS:
|
6683
6934
|
case TAG_ID.ARTICLE:
|
6935
|
+
case TAG_ID.SEARCH:
|
6684
6936
|
case TAG_ID.SECTION:
|
6685
6937
|
case TAG_ID.SUMMARY:
|
6686
6938
|
case TAG_ID.FIELDSET:
|
@@ -6804,8 +7056,9 @@ function startTagInBody(p, token) {
|
|
6804
7056
|
optgroupStartTagInBody(p, token);
|
6805
7057
|
break;
|
6806
7058
|
}
|
6807
|
-
case TAG_ID.NOEMBED:
|
6808
|
-
|
7059
|
+
case TAG_ID.NOEMBED:
|
7060
|
+
case TAG_ID.NOFRAMES: {
|
7061
|
+
rawTextStartTagInBody(p, token);
|
6809
7062
|
break;
|
6810
7063
|
}
|
6811
7064
|
case TAG_ID.FRAMESET: {
|
@@ -6818,7 +7071,7 @@ function startTagInBody(p, token) {
|
|
6818
7071
|
}
|
6819
7072
|
case TAG_ID.NOSCRIPT: {
|
6820
7073
|
if (p.options.scriptingEnabled) {
|
6821
|
-
|
7074
|
+
rawTextStartTagInBody(p, token);
|
6822
7075
|
}
|
6823
7076
|
else {
|
6824
7077
|
genericStartTagInBody(p, token);
|
@@ -6990,6 +7243,7 @@ function endTagInBody(p, token) {
|
|
6990
7243
|
case TAG_ID.ADDRESS:
|
6991
7244
|
case TAG_ID.ARTICLE:
|
6992
7245
|
case TAG_ID.DETAILS:
|
7246
|
+
case TAG_ID.SEARCH:
|
6993
7247
|
case TAG_ID.SECTION:
|
6994
7248
|
case TAG_ID.SUMMARY:
|
6995
7249
|
case TAG_ID.LISTING:
|
@@ -7590,6 +7844,17 @@ function startTagInSelect(p, token) {
|
|
7590
7844
|
p._insertElement(token, NS.HTML);
|
7591
7845
|
break;
|
7592
7846
|
}
|
7847
|
+
case TAG_ID.HR: {
|
7848
|
+
if (p.openElements.currentTagId === TAG_ID.OPTION) {
|
7849
|
+
p.openElements.pop();
|
7850
|
+
}
|
7851
|
+
if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
|
7852
|
+
p.openElements.pop();
|
7853
|
+
}
|
7854
|
+
p._appendElement(token, NS.HTML);
|
7855
|
+
token.ackSelfClosing = true;
|
7856
|
+
break;
|
7857
|
+
}
|
7593
7858
|
case TAG_ID.INPUT:
|
7594
7859
|
case TAG_ID.KEYGEN:
|
7595
7860
|
case TAG_ID.TEXTAREA:
|