webpack 5.107.0 → 5.107.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/lib/BannerPlugin.js +3 -4
  2. package/lib/Chunk.js +21 -25
  3. package/lib/ChunkGroup.js +57 -15
  4. package/lib/Compilation.js +33 -11
  5. package/lib/EvalSourceMapDevToolPlugin.js +0 -1
  6. package/lib/ExportsInfo.js +30 -34
  7. package/lib/ExternalModule.js +15 -11
  8. package/lib/ExternalModuleFactoryPlugin.js +2 -1
  9. package/lib/Module.js +1 -1
  10. package/lib/ModuleNotFoundError.js +10 -0
  11. package/lib/ModuleSourceTypeConstants.js +24 -22
  12. package/lib/NormalModule.js +106 -46
  13. package/lib/NormalModuleFactory.js +38 -26
  14. package/lib/RuntimePlugin.js +1 -1
  15. package/lib/SourceMapDevToolPlugin.js +250 -49
  16. package/lib/Template.js +1 -1
  17. package/lib/TemplatedPathPlugin.js +22 -4
  18. package/lib/asset/AssetBytesGenerator.js +6 -6
  19. package/lib/asset/AssetGenerator.js +14 -14
  20. package/lib/asset/AssetModulesPlugin.js +3 -7
  21. package/lib/asset/AssetSourceGenerator.js +6 -6
  22. package/lib/css/CssModulesPlugin.js +2 -2
  23. package/lib/dependencies/CommonJsImportsParserPlugin.js +108 -1
  24. package/lib/dependencies/CssUrlDependency.js +3 -2
  25. package/lib/dependencies/HarmonyDetectionParserPlugin.js +21 -1
  26. package/lib/dependencies/HtmlScriptSrcDependency.js +264 -25
  27. package/lib/dependencies/HtmlSourceDependency.js +3 -2
  28. package/lib/html/HtmlModulesPlugin.js +1 -5
  29. package/lib/html/walkHtmlTokens.js +641 -125
  30. package/lib/index.js +2 -0
  31. package/lib/javascript/JavascriptModulesPlugin.js +2 -2
  32. package/lib/optimize/SideEffectsFlagPlugin.js +1 -2
  33. package/lib/optimize/SplitChunksPlugin.js +4 -4
  34. package/lib/runtime/AutoPublicPathRuntimeModule.js +3 -3
  35. package/lib/runtime/GetChunkFilenameRuntimeModule.js +5 -5
  36. package/lib/sharing/ConsumeSharedPlugin.js +2 -8
  37. package/lib/sharing/ProvideSharedPlugin.js +4 -4
  38. package/lib/wasm-async/AsyncWebAssemblyModulesPlugin.js +1 -2
  39. package/package.json +3 -3
  40. package/schemas/WebpackOptions.check.js +1 -1
  41. package/schemas/WebpackOptions.json +11 -9
  42. package/schemas/plugins/container/ContainerReferencePlugin.check.js +1 -1
  43. package/schemas/plugins/container/ContainerReferencePlugin.json +1 -0
  44. package/schemas/plugins/container/ExternalsType.check.js +1 -1
  45. package/schemas/plugins/container/ModuleFederationPlugin.check.js +1 -1
  46. package/schemas/plugins/container/ModuleFederationPlugin.json +1 -0
  47. package/types.d.ts +355 -144
@@ -5,7 +5,25 @@
5
5
 
6
6
  "use strict";
7
7
 
8
- // cspell:ignore apos
8
+ // cspell:ignore apos notpre noncharacters DFFF
9
+
10
+ // #region html entities
11
+ // The contents of this region are auto-generated by
12
+ // `tooling/generate-html-entities.js` from `tooling/html-entities.json`.
13
+ // Do not edit by hand — re-run the generator (via `yarn fix:special`) to refresh.
14
+ //
15
+ // WHATWG named character references. Keys are entity names WITHOUT the
16
+ // leading `&` (some end with `;`, others omit it for legacy entities that
17
+ // match without a closing semicolon). Values are the decoded character
18
+ // strings (1–2 UTF-16 code units).
19
+ // Built on a null prototype so bracket lookups (`HTML_ENTITIES[name]`)
20
+ // can't be poisoned by inherited `Object.prototype` keys like `toString`,
21
+ // `constructor`, or `__proto__` — without this, `&toString;` would falsely
22
+ // look like a matched named character reference.
23
+ // prettier-ignore
24
+ // cspell:disable-next-line
25
+ const HTML_ENTITIES = /** @type {Readonly<Record<string, string>>} */ (Object.freeze(Object.assign(Object.create(null), {"AElig":"Æ","AElig;":"Æ","AMP":"&","AMP;":"&","Aacute":"Á","Aacute;":"Á","Abreve;":"Ă","Acirc":"Â","Acirc;":"Â","Acy;":"А","Afr;":"𝔄","Agrave":"À","Agrave;":"À","Alpha;":"Α","Amacr;":"Ā","And;":"⩓","Aogon;":"Ą","Aopf;":"𝔸","ApplyFunction;":"⁡","Aring":"Å","Aring;":"Å","Ascr;":"𝒜","Assign;":"≔","Atilde":"Ã","Atilde;":"Ã","Auml":"Ä","Auml;":"Ä","Backslash;":"∖","Barv;":"⫧","Barwed;":"⌆","Bcy;":"Б","Because;":"∵","Bernoullis;":"ℬ","Beta;":"Β","Bfr;":"𝔅","Bopf;":"𝔹","Breve;":"˘","Bscr;":"ℬ","Bumpeq;":"≎","CHcy;":"Ч","COPY":"©","COPY;":"©","Cacute;":"Ć","Cap;":"⋒","CapitalDifferentialD;":"ⅅ","Cayleys;":"ℭ","Ccaron;":"Č","Ccedil":"Ç","Ccedil;":"Ç","Ccirc;":"Ĉ","Cconint;":"∰","Cdot;":"Ċ","Cedilla;":"¸","CenterDot;":"·","Cfr;":"ℭ","Chi;":"Χ","CircleDot;":"⊙","CircleMinus;":"⊖","CirclePlus;":"⊕","CircleTimes;":"⊗","ClockwiseContourIntegral;":"∲","CloseCurlyDoubleQuote;":"”","CloseCurlyQuote;":"’","Colon;":"∷","Colone;":"⩴","Congruent;":"≡","Conint;":"∯","ContourIntegral;":"∮","Copf;":"ℂ","Coproduct;":"∐","CounterClockwiseContourIntegral;":"∳","Cross;":"⨯","Cscr;":"𝒞","Cup;":"⋓","CupCap;":"≍","DD;":"ⅅ","DDotrahd;":"⤑","DJcy;":"Ђ","DScy;":"Ѕ","DZcy;":"Џ","Dagger;":"‡","Darr;":"↡","Dashv;":"⫤","Dcaron;":"Ď","Dcy;":"Д","Del;":"∇","Delta;":"Δ","Dfr;":"𝔇","DiacriticalAcute;":"´","DiacriticalDot;":"˙","DiacriticalDoubleAcute;":"˝","DiacriticalGrave;":"`","DiacriticalTilde;":"˜","Diamond;":"⋄","DifferentialD;":"ⅆ","Dopf;":"𝔻","Dot;":"¨","DotDot;":"⃜","DotEqual;":"≐","DoubleContourIntegral;":"∯","DoubleDot;":"¨","DoubleDownArrow;":"⇓","DoubleLeftArrow;":"⇐","DoubleLeftRightArrow;":"⇔","DoubleLeftTee;":"⫤","DoubleLongLeftArrow;":"⟸","DoubleLongLeftRightArrow;":"⟺","DoubleLongRightArrow;":"⟹","DoubleRightArrow;":"⇒","DoubleRightTee;":"⊨","DoubleUpArrow;":"⇑","DoubleUpDownArrow;":"⇕","DoubleVerticalBar;":"∥","DownArrow;":"↓","DownArrowBar;":"⤓","DownArrowUpArrow;":"⇵","DownBreve;":"̑","DownLeftRightVector;":"⥐","DownLeftTeeVector;":"⥞","DownLeftVector;":"↽","DownLeftVectorBar;":"⥖","DownRightTeeVector;":"⥟","DownRightVector;":"⇁","DownRightVectorBar;":"⥗","DownTee;":"⊤","DownTeeArrow;":"↧","Downarrow;":"⇓","Dscr;":"𝒟","Dstrok;":"Đ","ENG;":"Ŋ","ETH":"Ð","ETH;":"Ð","Eacute":"É","Eacute;":"É","Ecaron;":"Ě","Ecirc":"Ê","Ecirc;":"Ê","Ecy;":"Э","Edot;":"Ė","Efr;":"𝔈","Egrave":"È","Egrave;":"È","Element;":"∈","Emacr;":"Ē","EmptySmallSquare;":"◻","EmptyVerySmallSquare;":"▫","Eogon;":"Ę","Eopf;":"𝔼","Epsilon;":"Ε","Equal;":"⩵","EqualTilde;":"≂","Equilibrium;":"⇌","Escr;":"ℰ","Esim;":"⩳","Eta;":"Η","Euml":"Ë","Euml;":"Ë","Exists;":"∃","ExponentialE;":"ⅇ","Fcy;":"Ф","Ffr;":"𝔉","FilledSmallSquare;":"◼","FilledVerySmallSquare;":"▪","Fopf;":"𝔽","ForAll;":"∀","Fouriertrf;":"ℱ","Fscr;":"ℱ","GJcy;":"Ѓ","GT":">","GT;":">","Gamma;":"Γ","Gammad;":"Ϝ","Gbreve;":"Ğ","Gcedil;":"Ģ","Gcirc;":"Ĝ","Gcy;":"Г","Gdot;":"Ġ","Gfr;":"𝔊","Gg;":"⋙","Gopf;":"𝔾","GreaterEqual;":"≥","GreaterEqualLess;":"⋛","GreaterFullEqual;":"≧","GreaterGreater;":"⪢","GreaterLess;":"≷","GreaterSlantEqual;":"⩾","GreaterTilde;":"≳","Gscr;":"𝒢","Gt;":"≫","HARDcy;":"Ъ","Hacek;":"ˇ","Hat;":"^","Hcirc;":"Ĥ","Hfr;":"ℌ","HilbertSpace;":"ℋ","Hopf;":"ℍ","HorizontalLine;":"─","Hscr;":"ℋ","Hstrok;":"Ħ","HumpDownHump;":"≎","HumpEqual;":"≏","IEcy;":"Е","IJlig;":"IJ","IOcy;":"Ё","Iacute":"Í","Iacute;":"Í","Icirc":"Î","Icirc;":"Î","Icy;":"И","Idot;":"İ","Ifr;":"ℑ","Igrave":"Ì","Igrave;":"Ì","Im;":"ℑ","Imacr;":"Ī","ImaginaryI;":"ⅈ","Implies;":"⇒","Int;":"∬","Integral;":"∫","Intersection;":"⋂","InvisibleComma;":"⁣","InvisibleTimes;":"⁢","Iogon;":"Į","Iopf;":"𝕀","Iota;":"Ι","Iscr;":"ℐ","Itilde;":"Ĩ","Iukcy;":"І","Iuml":"Ï","Iuml;":"Ï","Jcirc;":"Ĵ","Jcy;":"Й","Jfr;":"𝔍","Jopf;":"𝕁","Jscr;":"𝒥","Jsercy;":"Ј","Jukcy;":"Є","KHcy;":"Х","KJcy;":"Ќ","Kappa;":"Κ","Kcedil;":"Ķ","Kcy;":"К","Kfr;":"𝔎","Kopf;":"𝕂","Kscr;":"𝒦","LJcy;":"Љ","LT":"<","LT;":"<","Lacute;":"Ĺ","Lambda;":"Λ","Lang;":"⟪","Laplacetrf;":"ℒ","Larr;":"↞","Lcaron;":"Ľ","Lcedil;":"Ļ","Lcy;":"Л","LeftAngleBracket;":"⟨","LeftArrow;":"←","LeftArrowBar;":"⇤","LeftArrowRightArrow;":"⇆","LeftCeiling;":"⌈","LeftDoubleBracket;":"⟦","LeftDownTeeVector;":"⥡","LeftDownVector;":"⇃","LeftDownVectorBar;":"⥙","LeftFloor;":"⌊","LeftRightArrow;":"↔","LeftRightVector;":"⥎","LeftTee;":"⊣","LeftTeeArrow;":"↤","LeftTeeVector;":"⥚","LeftTriangle;":"⊲","LeftTriangleBar;":"⧏","LeftTriangleEqual;":"⊴","LeftUpDownVector;":"⥑","LeftUpTeeVector;":"⥠","LeftUpVector;":"↿","LeftUpVectorBar;":"⥘","LeftVector;":"↼","LeftVectorBar;":"⥒","Leftarrow;":"⇐","Leftrightarrow;":"⇔","LessEqualGreater;":"⋚","LessFullEqual;":"≦","LessGreater;":"≶","LessLess;":"⪡","LessSlantEqual;":"⩽","LessTilde;":"≲","Lfr;":"𝔏","Ll;":"⋘","Lleftarrow;":"⇚","Lmidot;":"Ŀ","LongLeftArrow;":"⟵","LongLeftRightArrow;":"⟷","LongRightArrow;":"⟶","Longleftarrow;":"⟸","Longleftrightarrow;":"⟺","Longrightarrow;":"⟹","Lopf;":"𝕃","LowerLeftArrow;":"↙","LowerRightArrow;":"↘","Lscr;":"ℒ","Lsh;":"↰","Lstrok;":"Ł","Lt;":"≪","Map;":"⤅","Mcy;":"М","MediumSpace;":" ","Mellintrf;":"ℳ","Mfr;":"𝔐","MinusPlus;":"∓","Mopf;":"𝕄","Mscr;":"ℳ","Mu;":"Μ","NJcy;":"Њ","Nacute;":"Ń","Ncaron;":"Ň","Ncedil;":"Ņ","Ncy;":"Н","NegativeMediumSpace;":"​","NegativeThickSpace;":"​","NegativeThinSpace;":"​","NegativeVeryThinSpace;":"​","NestedGreaterGreater;":"≫","NestedLessLess;":"≪","NewLine;":"\n","Nfr;":"𝔑","NoBreak;":"⁠","NonBreakingSpace;":" ","Nopf;":"ℕ","Not;":"⫬","NotCongruent;":"≢","NotCupCap;":"≭","NotDoubleVerticalBar;":"∦","NotElement;":"∉","NotEqual;":"≠","NotEqualTilde;":"≂̸","NotExists;":"∄","NotGreater;":"≯","NotGreaterEqual;":"≱","NotGreaterFullEqual;":"≧̸","NotGreaterGreater;":"≫̸","NotGreaterLess;":"≹","NotGreaterSlantEqual;":"⩾̸","NotGreaterTilde;":"≵","NotHumpDownHump;":"≎̸","NotHumpEqual;":"≏̸","NotLeftTriangle;":"⋪","NotLeftTriangleBar;":"⧏̸","NotLeftTriangleEqual;":"⋬","NotLess;":"≮","NotLessEqual;":"≰","NotLessGreater;":"≸","NotLessLess;":"≪̸","NotLessSlantEqual;":"⩽̸","NotLessTilde;":"≴","NotNestedGreaterGreater;":"⪢̸","NotNestedLessLess;":"⪡̸","NotPrecedes;":"⊀","NotPrecedesEqual;":"⪯̸","NotPrecedesSlantEqual;":"⋠","NotReverseElement;":"∌","NotRightTriangle;":"⋫","NotRightTriangleBar;":"⧐̸","NotRightTriangleEqual;":"⋭","NotSquareSubset;":"⊏̸","NotSquareSubsetEqual;":"⋢","NotSquareSuperset;":"⊐̸","NotSquareSupersetEqual;":"⋣","NotSubset;":"⊂⃒","NotSubsetEqual;":"⊈","NotSucceeds;":"⊁","NotSucceedsEqual;":"⪰̸","NotSucceedsSlantEqual;":"⋡","NotSucceedsTilde;":"≿̸","NotSuperset;":"⊃⃒","NotSupersetEqual;":"⊉","NotTilde;":"≁","NotTildeEqual;":"≄","NotTildeFullEqual;":"≇","NotTildeTilde;":"≉","NotVerticalBar;":"∤","Nscr;":"𝒩","Ntilde":"Ñ","Ntilde;":"Ñ","Nu;":"Ν","OElig;":"Œ","Oacute":"Ó","Oacute;":"Ó","Ocirc":"Ô","Ocirc;":"Ô","Ocy;":"О","Odblac;":"Ő","Ofr;":"𝔒","Ograve":"Ò","Ograve;":"Ò","Omacr;":"Ō","Omega;":"Ω","Omicron;":"Ο","Oopf;":"𝕆","OpenCurlyDoubleQuote;":"“","OpenCurlyQuote;":"‘","Or;":"⩔","Oscr;":"𝒪","Oslash":"Ø","Oslash;":"Ø","Otilde":"Õ","Otilde;":"Õ","Otimes;":"⨷","Ouml":"Ö","Ouml;":"Ö","OverBar;":"‾","OverBrace;":"⏞","OverBracket;":"⎴","OverParenthesis;":"⏜","PartialD;":"∂","Pcy;":"П","Pfr;":"𝔓","Phi;":"Φ","Pi;":"Π","PlusMinus;":"±","Poincareplane;":"ℌ","Popf;":"ℙ","Pr;":"⪻","Precedes;":"≺","PrecedesEqual;":"⪯","PrecedesSlantEqual;":"≼","PrecedesTilde;":"≾","Prime;":"″","Product;":"∏","Proportion;":"∷","Proportional;":"∝","Pscr;":"𝒫","Psi;":"Ψ","QUOT":"\"","QUOT;":"\"","Qfr;":"𝔔","Qopf;":"ℚ","Qscr;":"𝒬","RBarr;":"⤐","REG":"®","REG;":"®","Racute;":"Ŕ","Rang;":"⟫","Rarr;":"↠","Rarrtl;":"⤖","Rcaron;":"Ř","Rcedil;":"Ŗ","Rcy;":"Р","Re;":"ℜ","ReverseElement;":"∋","ReverseEquilibrium;":"⇋","ReverseUpEquilibrium;":"⥯","Rfr;":"ℜ","Rho;":"Ρ","RightAngleBracket;":"⟩","RightArrow;":"→","RightArrowBar;":"⇥","RightArrowLeftArrow;":"⇄","RightCeiling;":"⌉","RightDoubleBracket;":"⟧","RightDownTeeVector;":"⥝","RightDownVector;":"⇂","RightDownVectorBar;":"⥕","RightFloor;":"⌋","RightTee;":"⊢","RightTeeArrow;":"↦","RightTeeVector;":"⥛","RightTriangle;":"⊳","RightTriangleBar;":"⧐","RightTriangleEqual;":"⊵","RightUpDownVector;":"⥏","RightUpTeeVector;":"⥜","RightUpVector;":"↾","RightUpVectorBar;":"⥔","RightVector;":"⇀","RightVectorBar;":"⥓","Rightarrow;":"⇒","Ropf;":"ℝ","RoundImplies;":"⥰","Rrightarrow;":"⇛","Rscr;":"ℛ","Rsh;":"↱","RuleDelayed;":"⧴","SHCHcy;":"Щ","SHcy;":"Ш","SOFTcy;":"Ь","Sacute;":"Ś","Sc;":"⪼","Scaron;":"Š","Scedil;":"Ş","Scirc;":"Ŝ","Scy;":"С","Sfr;":"𝔖","ShortDownArrow;":"↓","ShortLeftArrow;":"←","ShortRightArrow;":"→","ShortUpArrow;":"↑","Sigma;":"Σ","SmallCircle;":"∘","Sopf;":"𝕊","Sqrt;":"√","Square;":"□","SquareIntersection;":"⊓","SquareSubset;":"⊏","SquareSubsetEqual;":"⊑","SquareSuperset;":"⊐","SquareSupersetEqual;":"⊒","SquareUnion;":"⊔","Sscr;":"𝒮","Star;":"⋆","Sub;":"⋐","Subset;":"⋐","SubsetEqual;":"⊆","Succeeds;":"≻","SucceedsEqual;":"⪰","SucceedsSlantEqual;":"≽","SucceedsTilde;":"≿","SuchThat;":"∋","Sum;":"∑","Sup;":"⋑","Superset;":"⊃","SupersetEqual;":"⊇","Supset;":"⋑","THORN":"Þ","THORN;":"Þ","TRADE;":"™","TSHcy;":"Ћ","TScy;":"Ц","Tab;":"\t","Tau;":"Τ","Tcaron;":"Ť","Tcedil;":"Ţ","Tcy;":"Т","Tfr;":"𝔗","Therefore;":"∴","Theta;":"Θ","ThickSpace;":"  ","ThinSpace;":" ","Tilde;":"∼","TildeEqual;":"≃","TildeFullEqual;":"≅","TildeTilde;":"≈","Topf;":"𝕋","TripleDot;":"⃛","Tscr;":"𝒯","Tstrok;":"Ŧ","Uacute":"Ú","Uacute;":"Ú","Uarr;":"↟","Uarrocir;":"⥉","Ubrcy;":"Ў","Ubreve;":"Ŭ","Ucirc":"Û","Ucirc;":"Û","Ucy;":"У","Udblac;":"Ű","Ufr;":"𝔘","Ugrave":"Ù","Ugrave;":"Ù","Umacr;":"Ū","UnderBar;":"_","UnderBrace;":"⏟","UnderBracket;":"⎵","UnderParenthesis;":"⏝","Union;":"⋃","UnionPlus;":"⊎","Uogon;":"Ų","Uopf;":"𝕌","UpArrow;":"↑","UpArrowBar;":"⤒","UpArrowDownArrow;":"⇅","UpDownArrow;":"↕","UpEquilibrium;":"⥮","UpTee;":"⊥","UpTeeArrow;":"↥","Uparrow;":"⇑","Updownarrow;":"⇕","UpperLeftArrow;":"↖","UpperRightArrow;":"↗","Upsi;":"ϒ","Upsilon;":"Υ","Uring;":"Ů","Uscr;":"𝒰","Utilde;":"Ũ","Uuml":"Ü","Uuml;":"Ü","VDash;":"⊫","Vbar;":"⫫","Vcy;":"В","Vdash;":"⊩","Vdashl;":"⫦","Vee;":"⋁","Verbar;":"‖","Vert;":"‖","VerticalBar;":"∣","VerticalLine;":"|","VerticalSeparator;":"❘","VerticalTilde;":"≀","VeryThinSpace;":" ","Vfr;":"𝔙","Vopf;":"𝕍","Vscr;":"𝒱","Vvdash;":"⊪","Wcirc;":"Ŵ","Wedge;":"⋀","Wfr;":"𝔚","Wopf;":"𝕎","Wscr;":"𝒲","Xfr;":"𝔛","Xi;":"Ξ","Xopf;":"𝕏","Xscr;":"𝒳","YAcy;":"Я","YIcy;":"Ї","YUcy;":"Ю","Yacute":"Ý","Yacute;":"Ý","Ycirc;":"Ŷ","Ycy;":"Ы","Yfr;":"𝔜","Yopf;":"𝕐","Yscr;":"𝒴","Yuml;":"Ÿ","ZHcy;":"Ж","Zacute;":"Ź","Zcaron;":"Ž","Zcy;":"З","Zdot;":"Ż","ZeroWidthSpace;":"​","Zeta;":"Ζ","Zfr;":"ℨ","Zopf;":"ℤ","Zscr;":"𝒵","aacute":"á","aacute;":"á","abreve;":"ă","ac;":"∾","acE;":"∾̳","acd;":"∿","acirc":"â","acirc;":"â","acute":"´","acute;":"´","acy;":"а","aelig":"æ","aelig;":"æ","af;":"⁡","afr;":"𝔞","agrave":"à","agrave;":"à","alefsym;":"ℵ","aleph;":"ℵ","alpha;":"α","amacr;":"ā","amalg;":"⨿","amp":"&","amp;":"&","and;":"∧","andand;":"⩕","andd;":"⩜","andslope;":"⩘","andv;":"⩚","ang;":"∠","ange;":"⦤","angle;":"∠","angmsd;":"∡","angmsdaa;":"⦨","angmsdab;":"⦩","angmsdac;":"⦪","angmsdad;":"⦫","angmsdae;":"⦬","angmsdaf;":"⦭","angmsdag;":"⦮","angmsdah;":"⦯","angrt;":"∟","angrtvb;":"⊾","angrtvbd;":"⦝","angsph;":"∢","angst;":"Å","angzarr;":"⍼","aogon;":"ą","aopf;":"𝕒","ap;":"≈","apE;":"⩰","apacir;":"⩯","ape;":"≊","apid;":"≋","apos;":"'","approx;":"≈","approxeq;":"≊","aring":"å","aring;":"å","ascr;":"𝒶","ast;":"*","asymp;":"≈","asympeq;":"≍","atilde":"ã","atilde;":"ã","auml":"ä","auml;":"ä","awconint;":"∳","awint;":"⨑","bNot;":"⫭","backcong;":"≌","backepsilon;":"϶","backprime;":"‵","backsim;":"∽","backsimeq;":"⋍","barvee;":"⊽","barwed;":"⌅","barwedge;":"⌅","bbrk;":"⎵","bbrktbrk;":"⎶","bcong;":"≌","bcy;":"б","bdquo;":"„","becaus;":"∵","because;":"∵","bemptyv;":"⦰","bepsi;":"϶","bernou;":"ℬ","beta;":"β","beth;":"ℶ","between;":"≬","bfr;":"𝔟","bigcap;":"⋂","bigcirc;":"◯","bigcup;":"⋃","bigodot;":"⨀","bigoplus;":"⨁","bigotimes;":"⨂","bigsqcup;":"⨆","bigstar;":"★","bigtriangledown;":"▽","bigtriangleup;":"△","biguplus;":"⨄","bigvee;":"⋁","bigwedge;":"⋀","bkarow;":"⤍","blacklozenge;":"⧫","blacksquare;":"▪","blacktriangle;":"▴","blacktriangledown;":"▾","blacktriangleleft;":"◂","blacktriangleright;":"▸","blank;":"␣","blk12;":"▒","blk14;":"░","blk34;":"▓","block;":"█","bne;":"=⃥","bnequiv;":"≡⃥","bnot;":"⌐","bopf;":"𝕓","bot;":"⊥","bottom;":"⊥","bowtie;":"⋈","boxDL;":"╗","boxDR;":"╔","boxDl;":"╖","boxDr;":"╓","boxH;":"═","boxHD;":"╦","boxHU;":"╩","boxHd;":"╤","boxHu;":"╧","boxUL;":"╝","boxUR;":"╚","boxUl;":"╜","boxUr;":"╙","boxV;":"║","boxVH;":"╬","boxVL;":"╣","boxVR;":"╠","boxVh;":"╫","boxVl;":"╢","boxVr;":"╟","boxbox;":"⧉","boxdL;":"╕","boxdR;":"╒","boxdl;":"┐","boxdr;":"┌","boxh;":"─","boxhD;":"╥","boxhU;":"╨","boxhd;":"┬","boxhu;":"┴","boxminus;":"⊟","boxplus;":"⊞","boxtimes;":"⊠","boxuL;":"╛","boxuR;":"╘","boxul;":"┘","boxur;":"└","boxv;":"│","boxvH;":"╪","boxvL;":"╡","boxvR;":"╞","boxvh;":"┼","boxvl;":"┤","boxvr;":"├","bprime;":"‵","breve;":"˘","brvbar":"¦","brvbar;":"¦","bscr;":"𝒷","bsemi;":"⁏","bsim;":"∽","bsime;":"⋍","bsol;":"\\","bsolb;":"⧅","bsolhsub;":"⟈","bull;":"•","bullet;":"•","bump;":"≎","bumpE;":"⪮","bumpe;":"≏","bumpeq;":"≏","cacute;":"ć","cap;":"∩","capand;":"⩄","capbrcup;":"⩉","capcap;":"⩋","capcup;":"⩇","capdot;":"⩀","caps;":"∩︀","caret;":"⁁","caron;":"ˇ","ccaps;":"⩍","ccaron;":"č","ccedil":"ç","ccedil;":"ç","ccirc;":"ĉ","ccups;":"⩌","ccupssm;":"⩐","cdot;":"ċ","cedil":"¸","cedil;":"¸","cemptyv;":"⦲","cent":"¢","cent;":"¢","centerdot;":"·","cfr;":"𝔠","chcy;":"ч","check;":"✓","checkmark;":"✓","chi;":"χ","cir;":"○","cirE;":"⧃","circ;":"ˆ","circeq;":"≗","circlearrowleft;":"↺","circlearrowright;":"↻","circledR;":"®","circledS;":"Ⓢ","circledast;":"⊛","circledcirc;":"⊚","circleddash;":"⊝","cire;":"≗","cirfnint;":"⨐","cirmid;":"⫯","cirscir;":"⧂","clubs;":"♣","clubsuit;":"♣","colon;":":","colone;":"≔","coloneq;":"≔","comma;":",","commat;":"@","comp;":"∁","compfn;":"∘","complement;":"∁","complexes;":"ℂ","cong;":"≅","congdot;":"⩭","conint;":"∮","copf;":"𝕔","coprod;":"∐","copy":"©","copy;":"©","copysr;":"℗","crarr;":"↵","cross;":"✗","cscr;":"𝒸","csub;":"⫏","csube;":"⫑","csup;":"⫐","csupe;":"⫒","ctdot;":"⋯","cudarrl;":"⤸","cudarrr;":"⤵","cuepr;":"⋞","cuesc;":"⋟","cularr;":"↶","cularrp;":"⤽","cup;":"∪","cupbrcap;":"⩈","cupcap;":"⩆","cupcup;":"⩊","cupdot;":"⊍","cupor;":"⩅","cups;":"∪︀","curarr;":"↷","curarrm;":"⤼","curlyeqprec;":"⋞","curlyeqsucc;":"⋟","curlyvee;":"⋎","curlywedge;":"⋏","curren":"¤","curren;":"¤","curvearrowleft;":"↶","curvearrowright;":"↷","cuvee;":"⋎","cuwed;":"⋏","cwconint;":"∲","cwint;":"∱","cylcty;":"⌭","dArr;":"⇓","dHar;":"⥥","dagger;":"†","daleth;":"ℸ","darr;":"↓","dash;":"‐","dashv;":"⊣","dbkarow;":"⤏","dblac;":"˝","dcaron;":"ď","dcy;":"д","dd;":"ⅆ","ddagger;":"‡","ddarr;":"⇊","ddotseq;":"⩷","deg":"°","deg;":"°","delta;":"δ","demptyv;":"⦱","dfisht;":"⥿","dfr;":"𝔡","dharl;":"⇃","dharr;":"⇂","diam;":"⋄","diamond;":"⋄","diamondsuit;":"♦","diams;":"♦","die;":"¨","digamma;":"ϝ","disin;":"⋲","div;":"÷","divide":"÷","divide;":"÷","divideontimes;":"⋇","divonx;":"⋇","djcy;":"ђ","dlcorn;":"⌞","dlcrop;":"⌍","dollar;":"$","dopf;":"𝕕","dot;":"˙","doteq;":"≐","doteqdot;":"≑","dotminus;":"∸","dotplus;":"∔","dotsquare;":"⊡","doublebarwedge;":"⌆","downarrow;":"↓","downdownarrows;":"⇊","downharpoonleft;":"⇃","downharpoonright;":"⇂","drbkarow;":"⤐","drcorn;":"⌟","drcrop;":"⌌","dscr;":"𝒹","dscy;":"ѕ","dsol;":"⧶","dstrok;":"đ","dtdot;":"⋱","dtri;":"▿","dtrif;":"▾","duarr;":"⇵","duhar;":"⥯","dwangle;":"⦦","dzcy;":"џ","dzigrarr;":"⟿","eDDot;":"⩷","eDot;":"≑","eacute":"é","eacute;":"é","easter;":"⩮","ecaron;":"ě","ecir;":"≖","ecirc":"ê","ecirc;":"ê","ecolon;":"≕","ecy;":"э","edot;":"ė","ee;":"ⅇ","efDot;":"≒","efr;":"𝔢","eg;":"⪚","egrave":"è","egrave;":"è","egs;":"⪖","egsdot;":"⪘","el;":"⪙","elinters;":"⏧","ell;":"ℓ","els;":"⪕","elsdot;":"⪗","emacr;":"ē","empty;":"∅","emptyset;":"∅","emptyv;":"∅","emsp13;":" ","emsp14;":" ","emsp;":" ","eng;":"ŋ","ensp;":" ","eogon;":"ę","eopf;":"𝕖","epar;":"⋕","eparsl;":"⧣","eplus;":"⩱","epsi;":"ε","epsilon;":"ε","epsiv;":"ϵ","eqcirc;":"≖","eqcolon;":"≕","eqsim;":"≂","eqslantgtr;":"⪖","eqslantless;":"⪕","equals;":"=","equest;":"≟","equiv;":"≡","equivDD;":"⩸","eqvparsl;":"⧥","erDot;":"≓","erarr;":"⥱","escr;":"ℯ","esdot;":"≐","esim;":"≂","eta;":"η","eth":"ð","eth;":"ð","euml":"ë","euml;":"ë","euro;":"€","excl;":"!","exist;":"∃","expectation;":"ℰ","exponentiale;":"ⅇ","fallingdotseq;":"≒","fcy;":"ф","female;":"♀","ffilig;":"ffi","fflig;":"ff","ffllig;":"ffl","ffr;":"𝔣","filig;":"fi","fjlig;":"fj","flat;":"♭","fllig;":"fl","fltns;":"▱","fnof;":"ƒ","fopf;":"𝕗","forall;":"∀","fork;":"⋔","forkv;":"⫙","fpartint;":"⨍","frac12":"½","frac12;":"½","frac13;":"⅓","frac14":"¼","frac14;":"¼","frac15;":"⅕","frac16;":"⅙","frac18;":"⅛","frac23;":"⅔","frac25;":"⅖","frac34":"¾","frac34;":"¾","frac35;":"⅗","frac38;":"⅜","frac45;":"⅘","frac56;":"⅚","frac58;":"⅝","frac78;":"⅞","frasl;":"⁄","frown;":"⌢","fscr;":"𝒻","gE;":"≧","gEl;":"⪌","gacute;":"ǵ","gamma;":"γ","gammad;":"ϝ","gap;":"⪆","gbreve;":"ğ","gcirc;":"ĝ","gcy;":"г","gdot;":"ġ","ge;":"≥","gel;":"⋛","geq;":"≥","geqq;":"≧","geqslant;":"⩾","ges;":"⩾","gescc;":"⪩","gesdot;":"⪀","gesdoto;":"⪂","gesdotol;":"⪄","gesl;":"⋛︀","gesles;":"⪔","gfr;":"𝔤","gg;":"≫","ggg;":"⋙","gimel;":"ℷ","gjcy;":"ѓ","gl;":"≷","glE;":"⪒","gla;":"⪥","glj;":"⪤","gnE;":"≩","gnap;":"⪊","gnapprox;":"⪊","gne;":"⪈","gneq;":"⪈","gneqq;":"≩","gnsim;":"⋧","gopf;":"𝕘","grave;":"`","gscr;":"ℊ","gsim;":"≳","gsime;":"⪎","gsiml;":"⪐","gt":">","gt;":">","gtcc;":"⪧","gtcir;":"⩺","gtdot;":"⋗","gtlPar;":"⦕","gtquest;":"⩼","gtrapprox;":"⪆","gtrarr;":"⥸","gtrdot;":"⋗","gtreqless;":"⋛","gtreqqless;":"⪌","gtrless;":"≷","gtrsim;":"≳","gvertneqq;":"≩︀","gvnE;":"≩︀","hArr;":"⇔","hairsp;":" ","half;":"½","hamilt;":"ℋ","hardcy;":"ъ","harr;":"↔","harrcir;":"⥈","harrw;":"↭","hbar;":"ℏ","hcirc;":"ĥ","hearts;":"♥","heartsuit;":"♥","hellip;":"…","hercon;":"⊹","hfr;":"𝔥","hksearow;":"⤥","hkswarow;":"⤦","hoarr;":"⇿","homtht;":"∻","hookleftarrow;":"↩","hookrightarrow;":"↪","hopf;":"𝕙","horbar;":"―","hscr;":"𝒽","hslash;":"ℏ","hstrok;":"ħ","hybull;":"⁃","hyphen;":"‐","iacute":"í","iacute;":"í","ic;":"⁣","icirc":"î","icirc;":"î","icy;":"и","iecy;":"е","iexcl":"¡","iexcl;":"¡","iff;":"⇔","ifr;":"𝔦","igrave":"ì","igrave;":"ì","ii;":"ⅈ","iiiint;":"⨌","iiint;":"∭","iinfin;":"⧜","iiota;":"℩","ijlig;":"ij","imacr;":"ī","image;":"ℑ","imagline;":"ℐ","imagpart;":"ℑ","imath;":"ı","imof;":"⊷","imped;":"Ƶ","in;":"∈","incare;":"℅","infin;":"∞","infintie;":"⧝","inodot;":"ı","int;":"∫","intcal;":"⊺","integers;":"ℤ","intercal;":"⊺","intlarhk;":"⨗","intprod;":"⨼","iocy;":"ё","iogon;":"į","iopf;":"𝕚","iota;":"ι","iprod;":"⨼","iquest":"¿","iquest;":"¿","iscr;":"𝒾","isin;":"∈","isinE;":"⋹","isindot;":"⋵","isins;":"⋴","isinsv;":"⋳","isinv;":"∈","it;":"⁢","itilde;":"ĩ","iukcy;":"і","iuml":"ï","iuml;":"ï","jcirc;":"ĵ","jcy;":"й","jfr;":"𝔧","jmath;":"ȷ","jopf;":"𝕛","jscr;":"𝒿","jsercy;":"ј","jukcy;":"є","kappa;":"κ","kappav;":"ϰ","kcedil;":"ķ","kcy;":"к","kfr;":"𝔨","kgreen;":"ĸ","khcy;":"х","kjcy;":"ќ","kopf;":"𝕜","kscr;":"𝓀","lAarr;":"⇚","lArr;":"⇐","lAtail;":"⤛","lBarr;":"⤎","lE;":"≦","lEg;":"⪋","lHar;":"⥢","lacute;":"ĺ","laemptyv;":"⦴","lagran;":"ℒ","lambda;":"λ","lang;":"⟨","langd;":"⦑","langle;":"⟨","lap;":"⪅","laquo":"«","laquo;":"«","larr;":"←","larrb;":"⇤","larrbfs;":"⤟","larrfs;":"⤝","larrhk;":"↩","larrlp;":"↫","larrpl;":"⤹","larrsim;":"⥳","larrtl;":"↢","lat;":"⪫","latail;":"⤙","late;":"⪭","lates;":"⪭︀","lbarr;":"⤌","lbbrk;":"❲","lbrace;":"{","lbrack;":"[","lbrke;":"⦋","lbrksld;":"⦏","lbrkslu;":"⦍","lcaron;":"ľ","lcedil;":"ļ","lceil;":"⌈","lcub;":"{","lcy;":"л","ldca;":"⤶","ldquo;":"“","ldquor;":"„","ldrdhar;":"⥧","ldrushar;":"⥋","ldsh;":"↲","le;":"≤","leftarrow;":"←","leftarrowtail;":"↢","leftharpoondown;":"↽","leftharpoonup;":"↼","leftleftarrows;":"⇇","leftrightarrow;":"↔","leftrightarrows;":"⇆","leftrightharpoons;":"⇋","leftrightsquigarrow;":"↭","leftthreetimes;":"⋋","leg;":"⋚","leq;":"≤","leqq;":"≦","leqslant;":"⩽","les;":"⩽","lescc;":"⪨","lesdot;":"⩿","lesdoto;":"⪁","lesdotor;":"⪃","lesg;":"⋚︀","lesges;":"⪓","lessapprox;":"⪅","lessdot;":"⋖","lesseqgtr;":"⋚","lesseqqgtr;":"⪋","lessgtr;":"≶","lesssim;":"≲","lfisht;":"⥼","lfloor;":"⌊","lfr;":"𝔩","lg;":"≶","lgE;":"⪑","lhard;":"↽","lharu;":"↼","lharul;":"⥪","lhblk;":"▄","ljcy;":"љ","ll;":"≪","llarr;":"⇇","llcorner;":"⌞","llhard;":"⥫","lltri;":"◺","lmidot;":"ŀ","lmoust;":"⎰","lmoustache;":"⎰","lnE;":"≨","lnap;":"⪉","lnapprox;":"⪉","lne;":"⪇","lneq;":"⪇","lneqq;":"≨","lnsim;":"⋦","loang;":"⟬","loarr;":"⇽","lobrk;":"⟦","longleftarrow;":"⟵","longleftrightarrow;":"⟷","longmapsto;":"⟼","longrightarrow;":"⟶","looparrowleft;":"↫","looparrowright;":"↬","lopar;":"⦅","lopf;":"𝕝","loplus;":"⨭","lotimes;":"⨴","lowast;":"∗","lowbar;":"_","loz;":"◊","lozenge;":"◊","lozf;":"⧫","lpar;":"(","lparlt;":"⦓","lrarr;":"⇆","lrcorner;":"⌟","lrhar;":"⇋","lrhard;":"⥭","lrm;":"‎","lrtri;":"⊿","lsaquo;":"‹","lscr;":"𝓁","lsh;":"↰","lsim;":"≲","lsime;":"⪍","lsimg;":"⪏","lsqb;":"[","lsquo;":"‘","lsquor;":"‚","lstrok;":"ł","lt":"<","lt;":"<","ltcc;":"⪦","ltcir;":"⩹","ltdot;":"⋖","lthree;":"⋋","ltimes;":"⋉","ltlarr;":"⥶","ltquest;":"⩻","ltrPar;":"⦖","ltri;":"◃","ltrie;":"⊴","ltrif;":"◂","lurdshar;":"⥊","luruhar;":"⥦","lvertneqq;":"≨︀","lvnE;":"≨︀","mDDot;":"∺","macr":"¯","macr;":"¯","male;":"♂","malt;":"✠","maltese;":"✠","map;":"↦","mapsto;":"↦","mapstodown;":"↧","mapstoleft;":"↤","mapstoup;":"↥","marker;":"▮","mcomma;":"⨩","mcy;":"м","mdash;":"—","measuredangle;":"∡","mfr;":"𝔪","mho;":"℧","micro":"µ","micro;":"µ","mid;":"∣","midast;":"*","midcir;":"⫰","middot":"·","middot;":"·","minus;":"−","minusb;":"⊟","minusd;":"∸","minusdu;":"⨪","mlcp;":"⫛","mldr;":"…","mnplus;":"∓","models;":"⊧","mopf;":"𝕞","mp;":"∓","mscr;":"𝓂","mstpos;":"∾","mu;":"μ","multimap;":"⊸","mumap;":"⊸","nGg;":"⋙̸","nGt;":"≫⃒","nGtv;":"≫̸","nLeftarrow;":"⇍","nLeftrightarrow;":"⇎","nLl;":"⋘̸","nLt;":"≪⃒","nLtv;":"≪̸","nRightarrow;":"⇏","nVDash;":"⊯","nVdash;":"⊮","nabla;":"∇","nacute;":"ń","nang;":"∠⃒","nap;":"≉","napE;":"⩰̸","napid;":"≋̸","napos;":"ʼn","napprox;":"≉","natur;":"♮","natural;":"♮","naturals;":"ℕ","nbsp":" ","nbsp;":" ","nbump;":"≎̸","nbumpe;":"≏̸","ncap;":"⩃","ncaron;":"ň","ncedil;":"ņ","ncong;":"≇","ncongdot;":"⩭̸","ncup;":"⩂","ncy;":"н","ndash;":"–","ne;":"≠","neArr;":"⇗","nearhk;":"⤤","nearr;":"↗","nearrow;":"↗","nedot;":"≐̸","nequiv;":"≢","nesear;":"⤨","nesim;":"≂̸","nexist;":"∄","nexists;":"∄","nfr;":"𝔫","ngE;":"≧̸","nge;":"≱","ngeq;":"≱","ngeqq;":"≧̸","ngeqslant;":"⩾̸","nges;":"⩾̸","ngsim;":"≵","ngt;":"≯","ngtr;":"≯","nhArr;":"⇎","nharr;":"↮","nhpar;":"⫲","ni;":"∋","nis;":"⋼","nisd;":"⋺","niv;":"∋","njcy;":"њ","nlArr;":"⇍","nlE;":"≦̸","nlarr;":"↚","nldr;":"‥","nle;":"≰","nleftarrow;":"↚","nleftrightarrow;":"↮","nleq;":"≰","nleqq;":"≦̸","nleqslant;":"⩽̸","nles;":"⩽̸","nless;":"≮","nlsim;":"≴","nlt;":"≮","nltri;":"⋪","nltrie;":"⋬","nmid;":"∤","nopf;":"𝕟","not":"¬","not;":"¬","notin;":"∉","notinE;":"⋹̸","notindot;":"⋵̸","notinva;":"∉","notinvb;":"⋷","notinvc;":"⋶","notni;":"∌","notniva;":"∌","notnivb;":"⋾","notnivc;":"⋽","npar;":"∦","nparallel;":"∦","nparsl;":"⫽⃥","npart;":"∂̸","npolint;":"⨔","npr;":"⊀","nprcue;":"⋠","npre;":"⪯̸","nprec;":"⊀","npreceq;":"⪯̸","nrArr;":"⇏","nrarr;":"↛","nrarrc;":"⤳̸","nrarrw;":"↝̸","nrightarrow;":"↛","nrtri;":"⋫","nrtrie;":"⋭","nsc;":"⊁","nsccue;":"⋡","nsce;":"⪰̸","nscr;":"𝓃","nshortmid;":"∤","nshortparallel;":"∦","nsim;":"≁","nsime;":"≄","nsimeq;":"≄","nsmid;":"∤","nspar;":"∦","nsqsube;":"⋢","nsqsupe;":"⋣","nsub;":"⊄","nsubE;":"⫅̸","nsube;":"⊈","nsubset;":"⊂⃒","nsubseteq;":"⊈","nsubseteqq;":"⫅̸","nsucc;":"⊁","nsucceq;":"⪰̸","nsup;":"⊅","nsupE;":"⫆̸","nsupe;":"⊉","nsupset;":"⊃⃒","nsupseteq;":"⊉","nsupseteqq;":"⫆̸","ntgl;":"≹","ntilde":"ñ","ntilde;":"ñ","ntlg;":"≸","ntriangleleft;":"⋪","ntrianglelefteq;":"⋬","ntriangleright;":"⋫","ntrianglerighteq;":"⋭","nu;":"ν","num;":"#","numero;":"№","numsp;":" ","nvDash;":"⊭","nvHarr;":"⤄","nvap;":"≍⃒","nvdash;":"⊬","nvge;":"≥⃒","nvgt;":">⃒","nvinfin;":"⧞","nvlArr;":"⤂","nvle;":"≤⃒","nvlt;":"<⃒","nvltrie;":"⊴⃒","nvrArr;":"⤃","nvrtrie;":"⊵⃒","nvsim;":"∼⃒","nwArr;":"⇖","nwarhk;":"⤣","nwarr;":"↖","nwarrow;":"↖","nwnear;":"⤧","oS;":"Ⓢ","oacute":"ó","oacute;":"ó","oast;":"⊛","ocir;":"⊚","ocirc":"ô","ocirc;":"ô","ocy;":"о","odash;":"⊝","odblac;":"ő","odiv;":"⨸","odot;":"⊙","odsold;":"⦼","oelig;":"œ","ofcir;":"⦿","ofr;":"𝔬","ogon;":"˛","ograve":"ò","ograve;":"ò","ogt;":"⧁","ohbar;":"⦵","ohm;":"Ω","oint;":"∮","olarr;":"↺","olcir;":"⦾","olcross;":"⦻","oline;":"‾","olt;":"⧀","omacr;":"ō","omega;":"ω","omicron;":"ο","omid;":"⦶","ominus;":"⊖","oopf;":"𝕠","opar;":"⦷","operp;":"⦹","oplus;":"⊕","or;":"∨","orarr;":"↻","ord;":"⩝","order;":"ℴ","orderof;":"ℴ","ordf":"ª","ordf;":"ª","ordm":"º","ordm;":"º","origof;":"⊶","oror;":"⩖","orslope;":"⩗","orv;":"⩛","oscr;":"ℴ","oslash":"ø","oslash;":"ø","osol;":"⊘","otilde":"õ","otilde;":"õ","otimes;":"⊗","otimesas;":"⨶","ouml":"ö","ouml;":"ö","ovbar;":"⌽","par;":"∥","para":"¶","para;":"¶","parallel;":"∥","parsim;":"⫳","parsl;":"⫽","part;":"∂","pcy;":"п","percnt;":"%","period;":".","permil;":"‰","perp;":"⊥","pertenk;":"‱","pfr;":"𝔭","phi;":"φ","phiv;":"ϕ","phmmat;":"ℳ","phone;":"☎","pi;":"π","pitchfork;":"⋔","piv;":"ϖ","planck;":"ℏ","planckh;":"ℎ","plankv;":"ℏ","plus;":"+","plusacir;":"⨣","plusb;":"⊞","pluscir;":"⨢","plusdo;":"∔","plusdu;":"⨥","pluse;":"⩲","plusmn":"±","plusmn;":"±","plussim;":"⨦","plustwo;":"⨧","pm;":"±","pointint;":"⨕","popf;":"𝕡","pound":"£","pound;":"£","pr;":"≺","prE;":"⪳","prap;":"⪷","prcue;":"≼","pre;":"⪯","prec;":"≺","precapprox;":"⪷","preccurlyeq;":"≼","preceq;":"⪯","precnapprox;":"⪹","precneqq;":"⪵","precnsim;":"⋨","precsim;":"≾","prime;":"′","primes;":"ℙ","prnE;":"⪵","prnap;":"⪹","prnsim;":"⋨","prod;":"∏","profalar;":"⌮","profline;":"⌒","profsurf;":"⌓","prop;":"∝","propto;":"∝","prsim;":"≾","prurel;":"⊰","pscr;":"𝓅","psi;":"ψ","puncsp;":" ","qfr;":"𝔮","qint;":"⨌","qopf;":"𝕢","qprime;":"⁗","qscr;":"𝓆","quaternions;":"ℍ","quatint;":"⨖","quest;":"?","questeq;":"≟","quot":"\"","quot;":"\"","rAarr;":"⇛","rArr;":"⇒","rAtail;":"⤜","rBarr;":"⤏","rHar;":"⥤","race;":"∽̱","racute;":"ŕ","radic;":"√","raemptyv;":"⦳","rang;":"⟩","rangd;":"⦒","range;":"⦥","rangle;":"⟩","raquo":"»","raquo;":"»","rarr;":"→","rarrap;":"⥵","rarrb;":"⇥","rarrbfs;":"⤠","rarrc;":"⤳","rarrfs;":"⤞","rarrhk;":"↪","rarrlp;":"↬","rarrpl;":"⥅","rarrsim;":"⥴","rarrtl;":"↣","rarrw;":"↝","ratail;":"⤚","ratio;":"∶","rationals;":"ℚ","rbarr;":"⤍","rbbrk;":"❳","rbrace;":"}","rbrack;":"]","rbrke;":"⦌","rbrksld;":"⦎","rbrkslu;":"⦐","rcaron;":"ř","rcedil;":"ŗ","rceil;":"⌉","rcub;":"}","rcy;":"р","rdca;":"⤷","rdldhar;":"⥩","rdquo;":"”","rdquor;":"”","rdsh;":"↳","real;":"ℜ","realine;":"ℛ","realpart;":"ℜ","reals;":"ℝ","rect;":"▭","reg":"®","reg;":"®","rfisht;":"⥽","rfloor;":"⌋","rfr;":"𝔯","rhard;":"⇁","rharu;":"⇀","rharul;":"⥬","rho;":"ρ","rhov;":"ϱ","rightarrow;":"→","rightarrowtail;":"↣","rightharpoondown;":"⇁","rightharpoonup;":"⇀","rightleftarrows;":"⇄","rightleftharpoons;":"⇌","rightrightarrows;":"⇉","rightsquigarrow;":"↝","rightthreetimes;":"⋌","ring;":"˚","risingdotseq;":"≓","rlarr;":"⇄","rlhar;":"⇌","rlm;":"‏","rmoust;":"⎱","rmoustache;":"⎱","rnmid;":"⫮","roang;":"⟭","roarr;":"⇾","robrk;":"⟧","ropar;":"⦆","ropf;":"𝕣","roplus;":"⨮","rotimes;":"⨵","rpar;":")","rpargt;":"⦔","rppolint;":"⨒","rrarr;":"⇉","rsaquo;":"›","rscr;":"𝓇","rsh;":"↱","rsqb;":"]","rsquo;":"’","rsquor;":"’","rthree;":"⋌","rtimes;":"⋊","rtri;":"▹","rtrie;":"⊵","rtrif;":"▸","rtriltri;":"⧎","ruluhar;":"⥨","rx;":"℞","sacute;":"ś","sbquo;":"‚","sc;":"≻","scE;":"⪴","scap;":"⪸","scaron;":"š","sccue;":"≽","sce;":"⪰","scedil;":"ş","scirc;":"ŝ","scnE;":"⪶","scnap;":"⪺","scnsim;":"⋩","scpolint;":"⨓","scsim;":"≿","scy;":"с","sdot;":"⋅","sdotb;":"⊡","sdote;":"⩦","seArr;":"⇘","searhk;":"⤥","searr;":"↘","searrow;":"↘","sect":"§","sect;":"§","semi;":";","seswar;":"⤩","setminus;":"∖","setmn;":"∖","sext;":"✶","sfr;":"𝔰","sfrown;":"⌢","sharp;":"♯","shchcy;":"щ","shcy;":"ш","shortmid;":"∣","shortparallel;":"∥","shy":"­","shy;":"­","sigma;":"σ","sigmaf;":"ς","sigmav;":"ς","sim;":"∼","simdot;":"⩪","sime;":"≃","simeq;":"≃","simg;":"⪞","simgE;":"⪠","siml;":"⪝","simlE;":"⪟","simne;":"≆","simplus;":"⨤","simrarr;":"⥲","slarr;":"←","smallsetminus;":"∖","smashp;":"⨳","smeparsl;":"⧤","smid;":"∣","smile;":"⌣","smt;":"⪪","smte;":"⪬","smtes;":"⪬︀","softcy;":"ь","sol;":"/","solb;":"⧄","solbar;":"⌿","sopf;":"𝕤","spades;":"♠","spadesuit;":"♠","spar;":"∥","sqcap;":"⊓","sqcaps;":"⊓︀","sqcup;":"⊔","sqcups;":"⊔︀","sqsub;":"⊏","sqsube;":"⊑","sqsubset;":"⊏","sqsubseteq;":"⊑","sqsup;":"⊐","sqsupe;":"⊒","sqsupset;":"⊐","sqsupseteq;":"⊒","squ;":"□","square;":"□","squarf;":"▪","squf;":"▪","srarr;":"→","sscr;":"𝓈","ssetmn;":"∖","ssmile;":"⌣","sstarf;":"⋆","star;":"☆","starf;":"★","straightepsilon;":"ϵ","straightphi;":"ϕ","strns;":"¯","sub;":"⊂","subE;":"⫅","subdot;":"⪽","sube;":"⊆","subedot;":"⫃","submult;":"⫁","subnE;":"⫋","subne;":"⊊","subplus;":"⪿","subrarr;":"⥹","subset;":"⊂","subseteq;":"⊆","subseteqq;":"⫅","subsetneq;":"⊊","subsetneqq;":"⫋","subsim;":"⫇","subsub;":"⫕","subsup;":"⫓","succ;":"≻","succapprox;":"⪸","succcurlyeq;":"≽","succeq;":"⪰","succnapprox;":"⪺","succneqq;":"⪶","succnsim;":"⋩","succsim;":"≿","sum;":"∑","sung;":"♪","sup1":"¹","sup1;":"¹","sup2":"²","sup2;":"²","sup3":"³","sup3;":"³","sup;":"⊃","supE;":"⫆","supdot;":"⪾","supdsub;":"⫘","supe;":"⊇","supedot;":"⫄","suphsol;":"⟉","suphsub;":"⫗","suplarr;":"⥻","supmult;":"⫂","supnE;":"⫌","supne;":"⊋","supplus;":"⫀","supset;":"⊃","supseteq;":"⊇","supseteqq;":"⫆","supsetneq;":"⊋","supsetneqq;":"⫌","supsim;":"⫈","supsub;":"⫔","supsup;":"⫖","swArr;":"⇙","swarhk;":"⤦","swarr;":"↙","swarrow;":"↙","swnwar;":"⤪","szlig":"ß","szlig;":"ß","target;":"⌖","tau;":"τ","tbrk;":"⎴","tcaron;":"ť","tcedil;":"ţ","tcy;":"т","tdot;":"⃛","telrec;":"⌕","tfr;":"𝔱","there4;":"∴","therefore;":"∴","theta;":"θ","thetasym;":"ϑ","thetav;":"ϑ","thickapprox;":"≈","thicksim;":"∼","thinsp;":" ","thkap;":"≈","thksim;":"∼","thorn":"þ","thorn;":"þ","tilde;":"˜","times":"×","times;":"×","timesb;":"⊠","timesbar;":"⨱","timesd;":"⨰","tint;":"∭","toea;":"⤨","top;":"⊤","topbot;":"⌶","topcir;":"⫱","topf;":"𝕥","topfork;":"⫚","tosa;":"⤩","tprime;":"‴","trade;":"™","triangle;":"▵","triangledown;":"▿","triangleleft;":"◃","trianglelefteq;":"⊴","triangleq;":"≜","triangleright;":"▹","trianglerighteq;":"⊵","tridot;":"◬","trie;":"≜","triminus;":"⨺","triplus;":"⨹","trisb;":"⧍","tritime;":"⨻","trpezium;":"⏢","tscr;":"𝓉","tscy;":"ц","tshcy;":"ћ","tstrok;":"ŧ","twixt;":"≬","twoheadleftarrow;":"↞","twoheadrightarrow;":"↠","uArr;":"⇑","uHar;":"⥣","uacute":"ú","uacute;":"ú","uarr;":"↑","ubrcy;":"ў","ubreve;":"ŭ","ucirc":"û","ucirc;":"û","ucy;":"у","udarr;":"⇅","udblac;":"ű","udhar;":"⥮","ufisht;":"⥾","ufr;":"𝔲","ugrave":"ù","ugrave;":"ù","uharl;":"↿","uharr;":"↾","uhblk;":"▀","ulcorn;":"⌜","ulcorner;":"⌜","ulcrop;":"⌏","ultri;":"◸","umacr;":"ū","uml":"¨","uml;":"¨","uogon;":"ų","uopf;":"𝕦","uparrow;":"↑","updownarrow;":"↕","upharpoonleft;":"↿","upharpoonright;":"↾","uplus;":"⊎","upsi;":"υ","upsih;":"ϒ","upsilon;":"υ","upuparrows;":"⇈","urcorn;":"⌝","urcorner;":"⌝","urcrop;":"⌎","uring;":"ů","urtri;":"◹","uscr;":"𝓊","utdot;":"⋰","utilde;":"ũ","utri;":"▵","utrif;":"▴","uuarr;":"⇈","uuml":"ü","uuml;":"ü","uwangle;":"⦧","vArr;":"⇕","vBar;":"⫨","vBarv;":"⫩","vDash;":"⊨","vangrt;":"⦜","varepsilon;":"ϵ","varkappa;":"ϰ","varnothing;":"∅","varphi;":"ϕ","varpi;":"ϖ","varpropto;":"∝","varr;":"↕","varrho;":"ϱ","varsigma;":"ς","varsubsetneq;":"⊊︀","varsubsetneqq;":"⫋︀","varsupsetneq;":"⊋︀","varsupsetneqq;":"⫌︀","vartheta;":"ϑ","vartriangleleft;":"⊲","vartriangleright;":"⊳","vcy;":"в","vdash;":"⊢","vee;":"∨","veebar;":"⊻","veeeq;":"≚","vellip;":"⋮","verbar;":"|","vert;":"|","vfr;":"𝔳","vltri;":"⊲","vnsub;":"⊂⃒","vnsup;":"⊃⃒","vopf;":"𝕧","vprop;":"∝","vrtri;":"⊳","vscr;":"𝓋","vsubnE;":"⫋︀","vsubne;":"⊊︀","vsupnE;":"⫌︀","vsupne;":"⊋︀","vzigzag;":"⦚","wcirc;":"ŵ","wedbar;":"⩟","wedge;":"∧","wedgeq;":"≙","weierp;":"℘","wfr;":"𝔴","wopf;":"𝕨","wp;":"℘","wr;":"≀","wreath;":"≀","wscr;":"𝓌","xcap;":"⋂","xcirc;":"◯","xcup;":"⋃","xdtri;":"▽","xfr;":"𝔵","xhArr;":"⟺","xharr;":"⟷","xi;":"ξ","xlArr;":"⟸","xlarr;":"⟵","xmap;":"⟼","xnis;":"⋻","xodot;":"⨀","xopf;":"𝕩","xoplus;":"⨁","xotime;":"⨂","xrArr;":"⟹","xrarr;":"⟶","xscr;":"𝓍","xsqcup;":"⨆","xuplus;":"⨄","xutri;":"△","xvee;":"⋁","xwedge;":"⋀","yacute":"ý","yacute;":"ý","yacy;":"я","ycirc;":"ŷ","ycy;":"ы","yen":"¥","yen;":"¥","yfr;":"𝔶","yicy;":"ї","yopf;":"𝕪","yscr;":"𝓎","yucy;":"ю","yuml":"ÿ","yuml;":"ÿ","zacute;":"ź","zcaron;":"ž","zcy;":"з","zdot;":"ż","zeetrf;":"ℨ","zeta;":"ζ","zfr;":"𝔷","zhcy;":"ж","zigrarr;":"⇝","zopf;":"𝕫","zscr;":"𝓏","zwj;":"‍","zwnj;":"‌"})));
26
+ // #endregion
9
27
 
10
28
  const STATE_DATA = 0;
11
29
  const STATE_TAG_OPEN = 1;
@@ -129,6 +147,13 @@ const QUOTE_DOUBLE = 1;
129
147
  const QUOTE_SINGLE = 2;
130
148
  const QUOTE_NONE = 0;
131
149
 
150
+ // Longest WHATWG named entity name *including* the trailing `;` is 32 chars
151
+ // (`CounterClockwiseContourIntegral;`); without the trailing `;` it's 31.
152
+ // Used to cap both the tokenizer's named-character-reference run length and
153
+ // the decoder's longest-prefix backtrack so pathological inputs (e.g. `&`
154
+ // followed by thousands of alphanumerics) stay linear-time.
155
+ const MAX_ENTITY_NAME_LEN = 32;
156
+
132
157
  /**
133
158
  * @param {number} cc character code
134
159
  * @returns {boolean} is ascii alpha
@@ -165,6 +190,19 @@ const isAsciiHexDigit = (cc) =>
165
190
  const isSpace = (cc) =>
166
191
  cc === CC_TAB || cc === CC_LF || cc === CC_FF || cc === CC_SPACE;
167
192
 
193
+ /**
194
+ * Severity of a tokenizer-detected parse error. `"warning"` is recoverable
195
+ * (the tokenizer continued and the emitted token is still well-formed, e.g.
196
+ * missing-attribute-value); `"error"` means the emitted token's offset
197
+ * range is incomplete or does not match what the spec would produce, e.g.
198
+ * eof-in-tag.
199
+ *
200
+ * Token offsets are JS string indices (UTF-16 code-unit offsets into
201
+ * `input`), not byte offsets — relevant for inputs containing non-BMP
202
+ * code points where one code point spans two indices.
203
+ * @typedef {"warning" | "error"} ParseErrorSeverity
204
+ */
205
+
168
206
  /**
169
207
  * @typedef {object} HtmlTokenCallbacks
170
208
  * @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number, selfClosing: boolean) => number=} openTag
@@ -173,6 +211,7 @@ const isSpace = (cc) =>
173
211
  * @property {(input: string, nameStart: number, nameEnd: number, valueStart: number, valueEnd: number, quoteType: number) => number=} attribute
174
212
  * @property {(input: string, start: number, end: number) => number=} comment
175
213
  * @property {(input: string, start: number, end: number) => number=} doctype
214
+ * @property {(input: string, code: string, start: number, end: number, severity: ParseErrorSeverity) => void=} parseError
176
215
  */
177
216
 
178
217
  /**
@@ -196,8 +235,32 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
196
235
  let attrQuoteType = QUOTE_NONE;
197
236
  let commentStart = pos;
198
237
  let lastOpenTagName = "";
199
- let tempBuffer = "";
238
+ // Counter used by SCRIPT_DATA_DOUBLE_ESCAPE_{START,END} to detect whether
239
+ // the ASCII-alpha run after `<` / `</` spells exactly `"script"`. Values
240
+ // 0..6 = number of chars matched so far; 7 = no longer matches (sentinel).
241
+ // Avoids growing a buffer for pathological inputs with long alpha runs.
242
+ let scriptMatch = 0;
200
243
  let namedEntityConsumed = 0;
244
+ // Tracks whether the current tag has parsed any attributes — used to
245
+ // fire the `end-tag-with-attributes` parse error when an end tag emits.
246
+ let tagHasAttributes = false;
247
+
248
+ /**
249
+ * Reports a tokenizer parse error to the consumer. The offset range and
250
+ * severity follow the WHATWG spec naming. Severity is `"error"` for
251
+ * cases where the emitted token is incomplete (EOF inside a tag or
252
+ * comment); everything else is a `"warning"`. Offsets are JS string
253
+ * indices (UTF-16 code-unit offsets into `input`).
254
+ * @param {string} code WHATWG parse-error code (kebab-case)
255
+ * @param {number} start string offset where the error starts
256
+ * @param {number} end string offset where the error ends
257
+ * @param {ParseErrorSeverity} severity error severity
258
+ */
259
+ const reportError = (code, start, end, severity) => {
260
+ if (callbacks.parseError !== undefined) {
261
+ callbacks.parseError(input, code, start, end, severity);
262
+ }
263
+ };
201
264
 
202
265
  /**
203
266
  * @param {number} cc character code
@@ -239,8 +302,16 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
239
302
  * @param {number} endPos end position
240
303
  */
241
304
  const flushText = (endPos) => {
242
- if (textStart < endPos && callbacks.text !== undefined) {
243
- callbacks.text(input, textStart, endPos);
305
+ if (textStart < endPos) {
306
+ if (callbacks.text !== undefined) {
307
+ callbacks.text(input, textStart, endPos);
308
+ }
309
+ // Advance `textStart` so a second `flushText` for the same span
310
+ // (e.g. from the EOF handler after a tag-open transition already
311
+ // flushed the pending text) is a no-op rather than a duplicate
312
+ // emit. emitOpenTag / emitCloseTag overwrite `textStart` with
313
+ // their own `nextPos` anyway, so this doesn't shift their start.
314
+ textStart = endPos;
244
315
  }
245
316
  };
246
317
 
@@ -249,7 +320,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
249
320
  * @returns {number} next position
250
321
  */
251
322
  const emitAttribute = (endPos) => {
252
- let nextPos = endPos;
323
+ // Default `nextPos` advances past the closing quote (if any) so the
324
+ // state machine can continue when no `attribute` callback is provided.
325
+ // When a callback IS provided, its return value overrides the default —
326
+ // the callback is expected to do the same advance based on the
327
+ // reported `quoteType`.
328
+ let nextPos = attrQuoteType === QUOTE_NONE ? endPos : endPos + 1;
253
329
  if (callbacks.attribute !== undefined && attrNameStart !== -1) {
254
330
  nextPos = callbacks.attribute(
255
331
  input,
@@ -260,6 +336,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
260
336
  attrQuoteType
261
337
  );
262
338
  }
339
+ if (attrNameStart !== -1) tagHasAttributes = true;
263
340
  attrNameStart = -1;
264
341
  attrValueStart = -1;
265
342
  attrQuoteType = QUOTE_NONE;
@@ -286,6 +363,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
286
363
  if (!selfClosing) {
287
364
  lastOpenTagName = input.slice(tagNameStart, tagNameEnd).toLowerCase();
288
365
  }
366
+ tagHasAttributes = false;
289
367
  textStart = nextPos;
290
368
  return nextPos;
291
369
  };
@@ -295,6 +373,10 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
295
373
  * @returns {number} next position
296
374
  */
297
375
  const emitCloseTag = (endPos) => {
376
+ // Per WHATWG: an end tag emitted with attributes is a parse error.
377
+ if (tagHasAttributes) {
378
+ reportError("end-tag-with-attributes", tagStart, endPos, "warning");
379
+ }
298
380
  let nextPos = endPos;
299
381
  if (callbacks.closeTag !== undefined) {
300
382
  nextPos = callbacks.closeTag(
@@ -305,6 +387,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
305
387
  tagNameEnd
306
388
  );
307
389
  }
390
+ tagHasAttributes = false;
308
391
  textStart = nextPos;
309
392
  return nextPos;
310
393
  };
@@ -348,6 +431,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
348
431
  // U+0021 EXCLAMATION MARK (!)
349
432
  // Switch to the markup declaration open state.
350
433
  flushText(tagStart);
434
+ commentStart = tagStart;
351
435
  state = STATE_MARKUP_DECLARATION_OPEN;
352
436
  pos++;
353
437
  } else if (isAsciiAlpha(cc)) {
@@ -363,14 +447,27 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
363
447
  // This is an unexpected-question-mark-instead-of-tag-name parse error.
364
448
  // Create a comment token whose data is the empty string. Reconsume in the
365
449
  // bogus comment state.
450
+ reportError(
451
+ "unexpected-question-mark-instead-of-tag-name",
452
+ pos,
453
+ pos + 1,
454
+ "warning"
455
+ );
366
456
  flushText(tagStart);
367
457
  commentStart = tagStart;
368
458
  state = STATE_BOGUS_COMMENT;
369
- pos++;
459
+ // Reconsume — let the bogus-comment state consume the `?`
460
+ // itself, matching the spec.
461
+ } else {
370
462
  // Anything else
371
463
  // This is an invalid-first-character-of-tag-name parse error. Emit a U+003C
372
464
  // LESS-THAN SIGN character token. Reconsume in the data state.
373
- } else {
465
+ reportError(
466
+ "invalid-first-character-of-tag-name",
467
+ pos,
468
+ pos + 1,
469
+ "warning"
470
+ );
374
471
  state = STATE_DATA;
375
472
  // Reconsume
376
473
  }
@@ -390,6 +487,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
390
487
  } else if (cc === CC_GREATER_THAN) {
391
488
  // U+003E GREATER-THAN SIGN (>)
392
489
  // This is a missing-end-tag-name parse error. Switch to the data state.
490
+ reportError("missing-end-tag-name", pos, pos + 1, "warning");
393
491
  state = STATE_DATA;
394
492
  pos++;
395
493
  } else {
@@ -397,10 +495,16 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
397
495
  // This is an invalid-first-character-of-tag-name parse error. Create a
398
496
  // comment token whose data is the empty string. Reconsume in the bogus
399
497
  // comment state.
498
+ reportError(
499
+ "invalid-first-character-of-tag-name",
500
+ pos,
501
+ pos + 1,
502
+ "warning"
503
+ );
400
504
  flushText(tagStart);
401
505
  commentStart = tagStart;
402
506
  state = STATE_BOGUS_COMMENT;
403
- pos++;
507
+ // Reconsume — let bogus-comment consume this char itself.
404
508
  }
405
509
  break;
406
510
 
@@ -461,6 +565,15 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
461
565
  state = STATE_AFTER_ATTRIBUTE_NAME;
462
566
  // Reconsume
463
567
  } else if (cc === CC_EQUALS) {
568
+ // U+003D EQUALS SIGN (=)
569
+ // This is an unexpected-equals-sign-before-attribute-name parse
570
+ // error. Start a new attribute. Switch to the attribute name state.
571
+ reportError(
572
+ "unexpected-equals-sign-before-attribute-name",
573
+ pos,
574
+ pos + 1,
575
+ "warning"
576
+ );
464
577
  attrNameStart = pos;
465
578
  state = STATE_ATTRIBUTE_NAME;
466
579
  pos++;
@@ -570,7 +683,13 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
570
683
  pos++;
571
684
  } else if (cc === CC_GREATER_THAN) {
572
685
  // U+003E GREATER-THAN SIGN (>)
573
- // Switch to the data state. Emit the current tag token.
686
+ // This is a missing-attribute-value parse error. Switch to the data
687
+ // state. Emit the current tag token. The attribute is reported with
688
+ // an empty value range pointing at the `>` so the open-tag offset range
689
+ // still includes the `>`.
690
+ reportError("missing-attribute-value", pos, pos + 1, "warning");
691
+ attrValueStart = pos;
692
+ attrQuoteType = QUOTE_NONE;
574
693
  pos = emitAttribute(pos);
575
694
  if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
576
695
  state = STATE_DATA;
@@ -699,6 +818,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
699
818
  // Anything else
700
819
  // This is a missing-whitespace-between-attributes parse error. Reconsume in
701
820
  // the before attribute name state.
821
+ reportError(
822
+ "missing-whitespace-between-attributes",
823
+ pos,
824
+ pos + 1,
825
+ "warning"
826
+ );
702
827
  state = STATE_BEFORE_ATTRIBUTE_NAME;
703
828
  // Reconsume
704
829
  }
@@ -722,6 +847,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
722
847
  // Anything else
723
848
  // This is an unexpected-solidus-in-tag parse error. Reconsume in the before
724
849
  // attribute name state.
850
+ reportError("unexpected-solidus-in-tag", pos, pos + 1, "warning");
725
851
  state = STATE_BEFORE_ATTRIBUTE_NAME;
726
852
  // Reconsume
727
853
  }
@@ -774,6 +900,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
774
900
  // This is an incorrectly-opened-comment parse error. Create a comment token
775
901
  // whose data is the empty string. Switch to the bogus comment state (don't
776
902
  // consume anything in the current state).
903
+ reportError("incorrectly-opened-comment", tagStart, pos, "warning");
777
904
  commentStart = tagStart;
778
905
  state = STATE_BOGUS_COMMENT;
779
906
  // Reconsume
@@ -792,6 +919,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
792
919
  // U+003E GREATER-THAN SIGN (>)
793
920
  // This is an abrupt-closing-of-empty-comment parse error. Switch to the
794
921
  // data state. Emit the current comment token.
922
+ reportError(
923
+ "abrupt-closing-of-empty-comment",
924
+ pos,
925
+ pos + 1,
926
+ "warning"
927
+ );
795
928
  let nextPos = pos + 1;
796
929
  if (callbacks.comment !== undefined) {
797
930
  nextPos = callbacks.comment(input, commentStart, pos + 1);
@@ -803,7 +936,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
803
936
  // Anything else
804
937
  // Reconsume in the comment state.
805
938
  state = STATE_COMMENT;
806
- pos++;
939
+ // Reconsume
807
940
  }
808
941
  break;
809
942
 
@@ -819,6 +952,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
819
952
  // U+003E GREATER-THAN SIGN (>)
820
953
  // This is an abrupt-closing-of-empty-comment parse error. Switch to the
821
954
  // data state. Emit the current comment token.
955
+ reportError(
956
+ "abrupt-closing-of-empty-comment",
957
+ pos,
958
+ pos + 1,
959
+ "warning"
960
+ );
822
961
  let nextPos = pos + 1;
823
962
  if (callbacks.comment !== undefined) {
824
963
  nextPos = callbacks.comment(input, commentStart, pos + 1);
@@ -831,7 +970,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
831
970
  // Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
832
971
  // Reconsume in the comment state.
833
972
  state = STATE_COMMENT;
834
- pos++;
973
+ // Reconsume
835
974
  }
836
975
  break;
837
976
 
@@ -886,7 +1025,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
886
1025
  pos = nextPos;
887
1026
  } else if (cc === CC_EXCLAMATION_MARK) {
888
1027
  // U+0021 EXCLAMATION MARK (!)
889
- // Switch to the markup declaration open state.
1028
+ // Switch to the comment end bang state.
890
1029
  state = STATE_COMMENT_END_BANG;
891
1030
  pos++;
892
1031
  } else if (cc === CC_HYPHEN_MINUS) {
@@ -914,6 +1053,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
914
1053
  // U+003E GREATER-THAN SIGN (>)
915
1054
  // This is an incorrectly-closed-comment parse error. Switch to the data
916
1055
  // state. Emit the current comment token.
1056
+ reportError("incorrectly-closed-comment", pos, pos + 1, "warning");
917
1057
  let nextPos = pos + 1;
918
1058
  if (callbacks.comment !== undefined) {
919
1059
  nextPos = callbacks.comment(input, commentStart, pos + 1);
@@ -1010,6 +1150,9 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1010
1150
  // Reconsume in the comment end state.
1011
1151
  // Anything else
1012
1152
  // This is a nested-comment parse error. Reconsume in the comment end state.
1153
+ if (cc !== CC_GREATER_THAN) {
1154
+ reportError("nested-comment", pos, pos + 1, "warning");
1155
+ }
1013
1156
  state = STATE_COMMENT_END;
1014
1157
  // Reconsume
1015
1158
  break;
@@ -1033,6 +1176,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1033
1176
  // Anything else
1034
1177
  // This is a missing-whitespace-before-doctype-name parse error. Reconsume
1035
1178
  // in the before DOCTYPE name state.
1179
+ reportError(
1180
+ "missing-whitespace-before-doctype-name",
1181
+ pos,
1182
+ pos + 1,
1183
+ "warning"
1184
+ );
1036
1185
  state = STATE_BEFORE_DOCTYPE_NAME;
1037
1186
  }
1038
1187
  break;
@@ -1059,6 +1208,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1059
1208
  // This is a missing-doctype-name parse error. Create a new DOCTYPE token.
1060
1209
  // Set its force-quirks flag to on. Switch to the data state. Emit the
1061
1210
  // current token.
1211
+ reportError("missing-doctype-name", pos, pos + 1, "warning");
1062
1212
  let nextPos = pos + 1;
1063
1213
  if (callbacks.doctype !== undefined) {
1064
1214
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1164,6 +1314,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1164
1314
  // This is an invalid-character-sequence-after-doctype-name parse error. Set
1165
1315
  // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1166
1316
  // bogus DOCTYPE state.
1317
+ reportError(
1318
+ "invalid-character-sequence-after-doctype-name",
1319
+ pos,
1320
+ pos + 1,
1321
+ "warning"
1322
+ );
1167
1323
  state = STATE_BOGUS_DOCTYPE;
1168
1324
  }
1169
1325
  break;
@@ -1185,6 +1341,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1185
1341
  // Set the current DOCTYPE token's public identifier to the empty string
1186
1342
  // (not missing), then switch to the DOCTYPE public identifier
1187
1343
  // (double-quoted) state.
1344
+ reportError(
1345
+ "missing-whitespace-after-doctype-public-keyword",
1346
+ pos,
1347
+ pos + 1,
1348
+ "warning"
1349
+ );
1188
1350
  state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
1189
1351
  pos++;
1190
1352
  } else if (cc === CC_APOSTROPHE) {
@@ -1193,6 +1355,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1193
1355
  // Set the current DOCTYPE token's public identifier to the empty string
1194
1356
  // (not missing), then switch to the DOCTYPE public identifier
1195
1357
  // (single-quoted) state.
1358
+ reportError(
1359
+ "missing-whitespace-after-doctype-public-keyword",
1360
+ pos,
1361
+ pos + 1,
1362
+ "warning"
1363
+ );
1196
1364
  state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
1197
1365
  pos++;
1198
1366
  } else if (cc === CC_GREATER_THAN) {
@@ -1200,6 +1368,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1200
1368
  // This is a missing-doctype-public-identifier parse error. Set the current
1201
1369
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1202
1370
  // the current DOCTYPE token.
1371
+ reportError(
1372
+ "missing-doctype-public-identifier",
1373
+ pos,
1374
+ pos + 1,
1375
+ "warning"
1376
+ );
1203
1377
  let nextPos = pos + 1;
1204
1378
  if (callbacks.doctype !== undefined) {
1205
1379
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1212,6 +1386,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1212
1386
  // This is a missing-quote-before-doctype-public-identifier parse error. Set
1213
1387
  // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1214
1388
  // bogus DOCTYPE state.
1389
+ reportError(
1390
+ "missing-quote-before-doctype-public-identifier",
1391
+ pos,
1392
+ pos + 1,
1393
+ "warning"
1394
+ );
1215
1395
  state = STATE_BOGUS_DOCTYPE;
1216
1396
  }
1217
1397
  break;
@@ -1245,6 +1425,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1245
1425
  // This is a missing-doctype-public-identifier parse error. Set the current
1246
1426
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1247
1427
  // the current DOCTYPE token.
1428
+ reportError(
1429
+ "missing-doctype-public-identifier",
1430
+ pos,
1431
+ pos + 1,
1432
+ "warning"
1433
+ );
1248
1434
  let nextPos = pos + 1;
1249
1435
  if (callbacks.doctype !== undefined) {
1250
1436
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1257,6 +1443,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1257
1443
  // This is a missing-quote-before-doctype-public-identifier parse error. Set
1258
1444
  // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1259
1445
  // bogus DOCTYPE state.
1446
+ reportError(
1447
+ "missing-quote-before-doctype-public-identifier",
1448
+ pos,
1449
+ pos + 1,
1450
+ "warning"
1451
+ );
1260
1452
  state = STATE_BOGUS_DOCTYPE;
1261
1453
  }
1262
1454
  break;
@@ -1280,6 +1472,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1280
1472
  // This is an abrupt-doctype-public-identifier parse error. Set the current
1281
1473
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1282
1474
  // the current DOCTYPE token.
1475
+ reportError(
1476
+ "abrupt-doctype-public-identifier",
1477
+ pos,
1478
+ pos + 1,
1479
+ "warning"
1480
+ );
1283
1481
  let nextPos = pos + 1;
1284
1482
  if (callbacks.doctype !== undefined) {
1285
1483
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1314,6 +1512,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1314
1512
  // This is an abrupt-doctype-public-identifier parse error. Set the current
1315
1513
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1316
1514
  // the current DOCTYPE token.
1515
+ reportError(
1516
+ "abrupt-doctype-public-identifier",
1517
+ pos,
1518
+ pos + 1,
1519
+ "warning"
1520
+ );
1317
1521
  let nextPos = pos + 1;
1318
1522
  if (callbacks.doctype !== undefined) {
1319
1523
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1356,6 +1560,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1356
1560
  // parse error. Set the current DOCTYPE token's system
1357
1561
  // identifier to the empty string (not missing), then switch
1358
1562
  // to the DOCTYPE system identifier (double-quoted) state.
1563
+ reportError(
1564
+ "missing-whitespace-between-doctype-public-and-system-identifiers",
1565
+ pos,
1566
+ pos + 1,
1567
+ "warning"
1568
+ );
1359
1569
  state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1360
1570
  pos++;
1361
1571
  } else if (cc === CC_APOSTROPHE) {
@@ -1364,6 +1574,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1364
1574
  // parse error. Set the current DOCTYPE token's system
1365
1575
  // identifier to the empty string (not missing), then switch
1366
1576
  // to the DOCTYPE system identifier (single-quoted) state.
1577
+ reportError(
1578
+ "missing-whitespace-between-doctype-public-and-system-identifiers",
1579
+ pos,
1580
+ pos + 1,
1581
+ "warning"
1582
+ );
1367
1583
  state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1368
1584
  pos++;
1369
1585
  } else {
@@ -1371,6 +1587,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1371
1587
  // This is a missing-quote-before-doctype-system-identifier parse error. Set
1372
1588
  // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1373
1589
  // bogus DOCTYPE state.
1590
+ reportError(
1591
+ "missing-quote-before-doctype-system-identifier",
1592
+ pos,
1593
+ pos + 1,
1594
+ "warning"
1595
+ );
1374
1596
  state = STATE_BOGUS_DOCTYPE;
1375
1597
  }
1376
1598
  break;
@@ -1414,6 +1636,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1414
1636
  // This is a missing-quote-before-doctype-system-identifier parse error. Set
1415
1637
  // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1416
1638
  // bogus DOCTYPE state.
1639
+ reportError(
1640
+ "missing-quote-before-doctype-system-identifier",
1641
+ pos,
1642
+ pos + 1,
1643
+ "warning"
1644
+ );
1417
1645
  state = STATE_BOGUS_DOCTYPE;
1418
1646
  }
1419
1647
  break;
@@ -1435,6 +1663,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1435
1663
  // Set the current DOCTYPE token's system identifier to the empty string
1436
1664
  // (not missing), then switch to the DOCTYPE system identifier
1437
1665
  // (double-quoted) state.
1666
+ reportError(
1667
+ "missing-whitespace-after-doctype-system-keyword",
1668
+ pos,
1669
+ pos + 1,
1670
+ "warning"
1671
+ );
1438
1672
  state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1439
1673
  pos++;
1440
1674
  } else if (cc === CC_APOSTROPHE) {
@@ -1443,6 +1677,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1443
1677
  // Set the current DOCTYPE token's system identifier to the empty string
1444
1678
  // (not missing), then switch to the DOCTYPE system identifier
1445
1679
  // (single-quoted) state.
1680
+ reportError(
1681
+ "missing-whitespace-after-doctype-system-keyword",
1682
+ pos,
1683
+ pos + 1,
1684
+ "warning"
1685
+ );
1446
1686
  state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1447
1687
  pos++;
1448
1688
  } else if (cc === CC_GREATER_THAN) {
@@ -1450,6 +1690,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1450
1690
  // This is a missing-doctype-system-identifier parse error. Set the current
1451
1691
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1452
1692
  // the current DOCTYPE token.
1693
+ reportError(
1694
+ "missing-doctype-system-identifier",
1695
+ pos,
1696
+ pos + 1,
1697
+ "warning"
1698
+ );
1453
1699
  let nextPos = pos + 1;
1454
1700
  if (callbacks.doctype !== undefined) {
1455
1701
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1462,6 +1708,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1462
1708
  // This is a missing-quote-before-doctype-system-identifier parse error. Set
1463
1709
  // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1464
1710
  // bogus DOCTYPE state.
1711
+ reportError(
1712
+ "missing-quote-before-doctype-system-identifier",
1713
+ pos,
1714
+ pos + 1,
1715
+ "warning"
1716
+ );
1465
1717
  state = STATE_BOGUS_DOCTYPE;
1466
1718
  }
1467
1719
  break;
@@ -1495,6 +1747,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1495
1747
  // This is a missing-doctype-system-identifier parse error. Set the current
1496
1748
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1497
1749
  // the current DOCTYPE token.
1750
+ reportError(
1751
+ "missing-doctype-system-identifier",
1752
+ pos,
1753
+ pos + 1,
1754
+ "warning"
1755
+ );
1498
1756
  let nextPos = pos + 1;
1499
1757
  if (callbacks.doctype !== undefined) {
1500
1758
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1507,6 +1765,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1507
1765
  // This is a missing-quote-before-doctype-system-identifier parse error. Set
1508
1766
  // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1509
1767
  // bogus DOCTYPE state.
1768
+ reportError(
1769
+ "missing-quote-before-doctype-system-identifier",
1770
+ pos,
1771
+ pos + 1,
1772
+ "warning"
1773
+ );
1510
1774
  state = STATE_BOGUS_DOCTYPE;
1511
1775
  }
1512
1776
  break;
@@ -1530,6 +1794,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1530
1794
  // This is an abrupt-doctype-system-identifier parse error. Set the current
1531
1795
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1532
1796
  // the current DOCTYPE token.
1797
+ reportError(
1798
+ "abrupt-doctype-system-identifier",
1799
+ pos,
1800
+ pos + 1,
1801
+ "warning"
1802
+ );
1533
1803
  let nextPos = pos + 1;
1534
1804
  if (callbacks.doctype !== undefined) {
1535
1805
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1564,6 +1834,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1564
1834
  // This is an abrupt-doctype-system-identifier parse error. Set the current
1565
1835
  // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1566
1836
  // the current DOCTYPE token.
1837
+ reportError(
1838
+ "abrupt-doctype-system-identifier",
1839
+ pos,
1840
+ pos + 1,
1841
+ "warning"
1842
+ );
1567
1843
  let nextPos = pos + 1;
1568
1844
  if (callbacks.doctype !== undefined) {
1569
1845
  nextPos = callbacks.doctype(input, commentStart, pos + 1);
@@ -1604,6 +1880,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1604
1880
  // This is an unexpected-character-after-doctype-system-identifier parse
1605
1881
  // error. Reconsume in the bogus DOCTYPE state. (This does not set the
1606
1882
  // current DOCTYPE token's force-quirks flag to on.)
1883
+ reportError(
1884
+ "unexpected-character-after-doctype-system-identifier",
1885
+ pos,
1886
+ pos + 1,
1887
+ "warning"
1888
+ );
1607
1889
  state = STATE_BOGUS_DOCTYPE;
1608
1890
  }
1609
1891
  break;
@@ -1710,10 +1992,10 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1710
1992
  case STATE_RCDATA_LESS_THAN_SIGN:
1711
1993
  // Consume the next input character:
1712
1994
  // U+002F SOLIDUS (/)
1713
- // Set the temporary buffer to the empty string. Switch to the RCDATA end
1714
- // tag open state.
1995
+ // Switch to the RCDATA end tag open state. (Spec sets a
1996
+ // temporary buffer here; we track the would-be content via
1997
+ // offset ranges instead.)
1715
1998
  if (cc === CC_SOLIDUS) {
1716
- tempBuffer = "";
1717
1999
  state = STATE_RCDATA_END_TAG_OPEN;
1718
2000
  pos++;
1719
2001
  } else {
@@ -1760,6 +2042,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1760
2042
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1761
2043
  lastOpenTagName
1762
2044
  ) {
2045
+ flushText(tagStart);
1763
2046
  state = STATE_BEFORE_ATTRIBUTE_NAME;
1764
2047
  pos++;
1765
2048
  } else {
@@ -1776,6 +2059,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1776
2059
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1777
2060
  lastOpenTagName
1778
2061
  ) {
2062
+ flushText(tagStart);
1779
2063
  state = STATE_SELF_CLOSING_START_TAG;
1780
2064
  pos++;
1781
2065
  } else {
@@ -1834,10 +2118,9 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1834
2118
  case STATE_RAWTEXT_LESS_THAN_SIGN:
1835
2119
  // Consume the next input character:
1836
2120
  // U+002F SOLIDUS (/)
1837
- // Set the temporary buffer to the empty string. Switch to the RAWTEXT end
1838
- // tag open state.
2121
+ // Switch to the RAWTEXT end tag open state. (Spec sets a
2122
+ // temporary buffer here; we track via offset ranges instead.)
1839
2123
  if (cc === CC_SOLIDUS) {
1840
- tempBuffer = "";
1841
2124
  state = STATE_RAWTEXT_END_TAG_OPEN;
1842
2125
  pos++;
1843
2126
  } else {
@@ -1884,6 +2167,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1884
2167
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1885
2168
  lastOpenTagName
1886
2169
  ) {
2170
+ flushText(tagStart);
1887
2171
  state = STATE_BEFORE_ATTRIBUTE_NAME;
1888
2172
  pos++;
1889
2173
  } else {
@@ -1899,6 +2183,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1899
2183
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1900
2184
  lastOpenTagName
1901
2185
  ) {
2186
+ flushText(tagStart);
1902
2187
  state = STATE_SELF_CLOSING_START_TAG;
1903
2188
  pos++;
1904
2189
  } else {
@@ -1955,10 +2240,9 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
1955
2240
  case STATE_SCRIPT_DATA_LESS_THAN_SIGN:
1956
2241
  // Consume the next input character:
1957
2242
  // U+002F SOLIDUS (/)
1958
- // Set the temporary buffer to the empty string. Switch to the script data
1959
- // end tag open state.
2243
+ // Switch to the script data end tag open state. (Spec sets a
2244
+ // temporary buffer here; we track via offset ranges instead.)
1960
2245
  if (cc === CC_SOLIDUS) {
1961
- tempBuffer = "";
1962
2246
  state = STATE_SCRIPT_DATA_END_TAG_OPEN;
1963
2247
  pos++;
1964
2248
  } else if (cc === CC_EXCLAMATION_MARK) {
@@ -2011,6 +2295,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2011
2295
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2012
2296
  lastOpenTagName
2013
2297
  ) {
2298
+ flushText(tagStart);
2014
2299
  state = STATE_BEFORE_ATTRIBUTE_NAME;
2015
2300
  pos++;
2016
2301
  } else {
@@ -2026,6 +2311,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2026
2311
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2027
2312
  lastOpenTagName
2028
2313
  ) {
2314
+ flushText(tagStart);
2029
2315
  state = STATE_SELF_CLOSING_START_TAG;
2030
2316
  pos++;
2031
2317
  } else {
@@ -2107,6 +2393,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2107
2393
  } else if (cc === CC_LESS_THAN) {
2108
2394
  // U+003C LESS-THAN SIGN (<)
2109
2395
  // Switch to the script data escaped less-than sign state.
2396
+ tagStart = pos;
2110
2397
  state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2111
2398
  pos++;
2112
2399
  } else {
@@ -2128,6 +2415,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2128
2415
  } else if (cc === CC_LESS_THAN) {
2129
2416
  // U+003C LESS-THAN SIGN (<)
2130
2417
  // Switch to the script data escaped less-than sign state.
2418
+ tagStart = pos;
2131
2419
  state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2132
2420
  pos++;
2133
2421
  } else {
@@ -2149,6 +2437,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2149
2437
  } else if (cc === CC_LESS_THAN) {
2150
2438
  // U+003C LESS-THAN SIGN (<)
2151
2439
  // Switch to the script data escaped less-than sign state.
2440
+ tagStart = pos;
2152
2441
  state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2153
2442
  pos++;
2154
2443
  } else if (cc === CC_GREATER_THAN) {
@@ -2170,10 +2459,9 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2170
2459
  case STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
2171
2460
  // Consume the next input character:
2172
2461
  // U+002F SOLIDUS (/)
2173
- // Set the temporary buffer to the empty string. Switch to the script data
2174
- // escaped end tag open state.
2462
+ // Switch to the script data escaped end tag open state.
2463
+ // (Spec sets a temporary buffer; we track via offset ranges.)
2175
2464
  if (cc === CC_SOLIDUS) {
2176
- tempBuffer = "";
2177
2465
  state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
2178
2466
  pos++;
2179
2467
  } else if (isAsciiAlpha(cc)) {
@@ -2181,7 +2469,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2181
2469
  // Set the temporary buffer to the empty string. Emit a U+003C LESS-THAN
2182
2470
  // SIGN character token. Reconsume in the script data double escape start
2183
2471
  // state.
2184
- tempBuffer = "";
2472
+ scriptMatch = 0;
2185
2473
  state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START;
2186
2474
  // Reconsume
2187
2475
  } else {
@@ -2228,6 +2516,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2228
2516
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2229
2517
  lastOpenTagName
2230
2518
  ) {
2519
+ flushText(tagStart);
2231
2520
  state = STATE_BEFORE_ATTRIBUTE_NAME;
2232
2521
  pos++;
2233
2522
  } else {
@@ -2243,6 +2532,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2243
2532
  input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2244
2533
  lastOpenTagName
2245
2534
  ) {
2535
+ flushText(tagStart);
2246
2536
  state = STATE_SELF_CLOSING_START_TAG;
2247
2537
  pos++;
2248
2538
  } else {
@@ -2292,22 +2582,21 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2292
2582
  // state. Emit the current input character as a character token.
2293
2583
  if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
2294
2584
  state =
2295
- tempBuffer === "script"
2585
+ scriptMatch === 6
2296
2586
  ? STATE_SCRIPT_DATA_DOUBLE_ESCAPED
2297
2587
  : STATE_SCRIPT_DATA_ESCAPED;
2298
2588
  pos++;
2299
- } else if (isAsciiUpperAlpha(cc)) {
2300
- // ASCII upper alpha
2301
- // Append the lowercase version of the current input character (add 0x0020
2302
- // to the character's code point) to the temporary buffer. Emit the current
2303
- // input character as a character token.
2304
- tempBuffer += String.fromCharCode(cc + 0x20);
2305
- pos++;
2306
- } else if (isAsciiLowerAlpha(cc)) {
2307
- // ASCII lower alpha
2308
- // Append the current input character to the temporary buffer. Emit the
2309
- // current input character as a character token.
2310
- tempBuffer += String.fromCharCode(cc);
2589
+ } else if (isAsciiUpperAlpha(cc) || isAsciiLowerAlpha(cc)) {
2590
+ // ASCII alpha — advance the `"script"` match counter if the
2591
+ // lowercase form matches the next expected char, otherwise
2592
+ // snap to the sentinel so further chars can't revive a
2593
+ // match. No buffer allocation.
2594
+ const lower = isAsciiUpperAlpha(cc) ? cc + 0x20 : cc;
2595
+ if (scriptMatch < 6 && lower === "script".charCodeAt(scriptMatch)) {
2596
+ scriptMatch++;
2597
+ } else {
2598
+ scriptMatch = 7;
2599
+ }
2311
2600
  pos++;
2312
2601
  } else {
2313
2602
  // Anything else
@@ -2398,7 +2687,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2398
2687
  // Set the temporary buffer to the empty string. Switch to the script data
2399
2688
  // double escape end state. Emit a U+002F SOLIDUS character token.
2400
2689
  if (cc === CC_SOLIDUS) {
2401
- tempBuffer = "";
2690
+ scriptMatch = 0;
2402
2691
  state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END;
2403
2692
  pos++;
2404
2693
  } else {
@@ -2423,25 +2712,20 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2423
2712
  // state. Emit the current input character as a character token.
2424
2713
  if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
2425
2714
  state =
2426
- tempBuffer === "script"
2715
+ scriptMatch === 6
2427
2716
  ? STATE_SCRIPT_DATA_ESCAPED
2428
2717
  : STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2429
2718
  pos++;
2430
- } else if (isAsciiUpperAlpha(cc)) {
2431
- // ASCII upper alpha
2432
- // Append the lowercase version of the current input character (add 0x0020
2433
- // to the character's code point) to the temporary buffer. Emit the current
2434
- // input character as a character token.
2435
- if (tempBuffer.length < 6) {
2436
- tempBuffer += String.fromCharCode(cc + 0x20);
2437
- }
2438
- pos++;
2439
- } else if (isAsciiLowerAlpha(cc)) {
2440
- // ASCII lower alpha
2441
- // Append the current input character to the temporary buffer. Emit the
2442
- // current input character as a character token.
2443
- if (tempBuffer.length < 6) {
2444
- tempBuffer += String.fromCharCode(cc);
2719
+ } else if (isAsciiUpperAlpha(cc) || isAsciiLowerAlpha(cc)) {
2720
+ // ASCII alpha — advance the `"script"` match counter if the
2721
+ // lowercase form matches the next expected char, otherwise
2722
+ // snap to the sentinel so further chars can't revive a
2723
+ // match. No buffer allocation.
2724
+ const lower = isAsciiUpperAlpha(cc) ? cc + 0x20 : cc;
2725
+ if (scriptMatch < 6 && lower === "script".charCodeAt(scriptMatch)) {
2726
+ scriptMatch++;
2727
+ } else {
2728
+ scriptMatch = 7;
2445
2729
  }
2446
2730
  pos++;
2447
2731
  } else {
@@ -2486,36 +2770,42 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2486
2770
  break;
2487
2771
 
2488
2772
  // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
2489
- case STATE_NAMED_CHARACTER_REFERENCE:
2490
- // Consume the maximum number of characters possible, where the
2773
+ case STATE_NAMED_CHARACTER_REFERENCE: {
2774
+ // Consume the maximum number of characters possible where the
2491
2775
  // consumed characters are one of the identifiers in the first
2492
- // column of the named character references table. Append each
2493
- // character to the temporary buffer when it's consumed.
2776
+ // column of the named character references table.
2494
2777
  //
2495
- // TODO(named-entities): The WHATWG spec requires matching against
2496
- // the full named character references table (~2,000 entries).
2497
- // For now, we scan past the entity name without table lookup to keep
2498
- // the core tokenizer lightweight. A follow-up PR will provide a build-time
2499
- // script to generate a compact Trie/Map or dynamic import for the full
2500
- // table, along with the `is_consumed_as_part_of_an_attribute` check.
2501
- // The semantic fallback for consumers (like HtmlParser) is to use the
2502
- // minimal `decodeHtmlEntities` utility exported below, which guarantees
2503
- // correctness for URLs (`&amp;`) and common characters without bundle bloat.
2504
- namedEntityConsumed = 0;
2778
+ // We measure the longest run of ASCII alphanumeric characters
2779
+ // (capped at MAX_ENTITY_NAME_LEN - 1 since the optional `;` is
2780
+ // handled separately), then walk that run from longest to
2781
+ // shortest looking for the first prefix that exists in the
2782
+ // entity table (with a trailing `;` if present, otherwise the
2783
+ // legacy bare form).
2784
+ let runLen = 0;
2505
2785
  while (
2506
- pos + namedEntityConsumed < len &&
2507
- isAsciiAlphanumeric(input.charCodeAt(pos + namedEntityConsumed))
2786
+ pos + runLen < len &&
2787
+ isAsciiAlphanumeric(input.charCodeAt(pos + runLen)) &&
2788
+ runLen < MAX_ENTITY_NAME_LEN - 1
2508
2789
  ) {
2509
- namedEntityConsumed++;
2510
- // Safety cap — the longest entity is ~33 chars
2511
- if (namedEntityConsumed > 33) break;
2790
+ runLen++;
2512
2791
  }
2513
- // Check for trailing semicolon
2514
- if (
2515
- pos + namedEntityConsumed < len &&
2516
- input.charCodeAt(pos + namedEntityConsumed) === CC_SEMICOLON
2517
- ) {
2518
- namedEntityConsumed++;
2792
+ const hasSemicolon =
2793
+ pos + runLen < len && input.charCodeAt(pos + runLen) === CC_SEMICOLON;
2794
+ namedEntityConsumed = 0;
2795
+ for (let n = runLen; n > 0; n--) {
2796
+ // Try with trailing `;` first if one is present after the run.
2797
+ if (n === runLen && hasSemicolon) {
2798
+ const withSemi = `${input.slice(pos, pos + n)};`;
2799
+ if (HTML_ENTITIES[withSemi] !== undefined) {
2800
+ namedEntityConsumed = n + 1;
2801
+ break;
2802
+ }
2803
+ }
2804
+ const bare = input.slice(pos, pos + n);
2805
+ if (HTML_ENTITIES[bare] !== undefined) {
2806
+ namedEntityConsumed = n;
2807
+ break;
2808
+ }
2519
2809
  }
2520
2810
  if (namedEntityConsumed > 0) {
2521
2811
  pos += namedEntityConsumed;
@@ -2526,6 +2816,7 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2526
2816
  state = STATE_AMBIGUOUS_AMPERSAND;
2527
2817
  }
2528
2818
  break;
2819
+ }
2529
2820
 
2530
2821
  // https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
2531
2822
  case STATE_AMBIGUOUS_AMPERSAND:
@@ -2533,14 +2824,20 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2533
2824
  if (isAsciiAlphanumeric(cc)) {
2534
2825
  // ASCII alphanumeric
2535
2826
  // If the character reference was consumed as part of an
2536
- // attribute, then append the current input character to
2537
- // the current attribute's value. Otherwise, emit the
2538
- // current input character as a character token.
2827
+ // attribute, then append the current input character to the
2828
+ // current attribute's value. Otherwise, emit the current
2829
+ // input character as a character token.
2539
2830
  pos++;
2540
2831
  } else if (cc === CC_SEMICOLON) {
2541
2832
  // U+003B SEMICOLON (;)
2542
2833
  // This is an unknown-named-character-reference parse error.
2543
2834
  // Reconsume in the return state.
2835
+ reportError(
2836
+ "unknown-named-character-reference",
2837
+ pos,
2838
+ pos + 1,
2839
+ "warning"
2840
+ );
2544
2841
  state = returnState;
2545
2842
  // Reconsume
2546
2843
  } else {
@@ -2578,9 +2875,17 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2578
2875
  // Anything else: absence-of-digits-in-numeric-character-reference parse
2579
2876
  // error. Flush code points consumed as a character reference. Reconsume
2580
2877
  // in the return state.
2581
- state = isAsciiHexDigit(cc)
2582
- ? STATE_HEXADECIMAL_CHARACTER_REFERENCE
2583
- : returnState;
2878
+ if (isAsciiHexDigit(cc)) {
2879
+ state = STATE_HEXADECIMAL_CHARACTER_REFERENCE;
2880
+ } else {
2881
+ reportError(
2882
+ "absence-of-digits-in-numeric-character-reference",
2883
+ pos,
2884
+ pos + 1,
2885
+ "warning"
2886
+ );
2887
+ state = returnState;
2888
+ }
2584
2889
  // Reconsume
2585
2890
  break;
2586
2891
 
@@ -2591,9 +2896,17 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2591
2896
  // Anything else: absence-of-digits-in-numeric-character-reference parse
2592
2897
  // error. Flush code points consumed as a character reference. Reconsume
2593
2898
  // in the return state.
2594
- state = isAsciiDigit(cc)
2595
- ? STATE_DECIMAL_CHARACTER_REFERENCE
2596
- : returnState;
2899
+ if (isAsciiDigit(cc)) {
2900
+ state = STATE_DECIMAL_CHARACTER_REFERENCE;
2901
+ } else {
2902
+ reportError(
2903
+ "absence-of-digits-in-numeric-character-reference",
2904
+ pos,
2905
+ pos + 1,
2906
+ "warning"
2907
+ );
2908
+ state = returnState;
2909
+ }
2597
2910
  // Reconsume
2598
2911
  break;
2599
2912
 
@@ -2616,6 +2929,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2616
2929
  // This is a missing-semicolon-after-character-reference
2617
2930
  // parse error. Reconsume in the numeric character reference
2618
2931
  // end state.
2932
+ reportError(
2933
+ "missing-semicolon-after-character-reference",
2934
+ pos,
2935
+ pos + 1,
2936
+ "warning"
2937
+ );
2619
2938
  state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2620
2939
  // Reconsume
2621
2940
  }
@@ -2641,6 +2960,12 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2641
2960
  // This is a missing-semicolon-after-character-reference
2642
2961
  // parse error. Reconsume in the numeric character reference
2643
2962
  // end state.
2963
+ reportError(
2964
+ "missing-semicolon-after-character-reference",
2965
+ pos,
2966
+ pos + 1,
2967
+ "warning"
2968
+ );
2644
2969
  state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2645
2970
  // Reconsume
2646
2971
  }
@@ -2656,26 +2981,111 @@ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
2656
2981
  // Reconsume
2657
2982
  break;
2658
2983
 
2984
+ /* istanbul ignore next -- @preserve: defensive fallback, all states are explicit above */
2659
2985
  default:
2660
2986
  pos++;
2661
2987
  }
2662
2988
  }
2663
2989
 
2990
+ // Handle EOF in non-data states per the WHATWG spec.
2991
+ //
2992
+ // Each in-progress comment / doctype / cdata / tag emits its partial
2993
+ // token range plus a corresponding `eof-in-X` parse error. Severity is
2994
+ // `"error"` because the emitted token offset range is incomplete (missing
2995
+ // trailing `-->`, `>`, `]]>`, etc.). For data / `<` / `</` / `<!`-only
2996
+ // inputs we emit `eof-before-tag-name` and fall through to flush the
2997
+ // pending text span (which still contains the lone `<`).
2998
+ // If EOF caught us inside a character-reference state, flush whatever the
2999
+ // scanner had consumed and resume in the return state so any in-progress
3000
+ // tag/comment is handled correctly by the branches below.
3001
+ if (
3002
+ state >= STATE_CHARACTER_REFERENCE &&
3003
+ state <= STATE_NUMERIC_CHARACTER_REFERENCE_END
3004
+ ) {
3005
+ state = returnState;
3006
+ }
3007
+
2664
3008
  if (
2665
- (state >= STATE_MARKUP_DECLARATION_OPEN && state <= STATE_BOGUS_COMMENT) ||
3009
+ (state >= STATE_TAG_NAME && state <= STATE_SELF_CLOSING_START_TAG) ||
3010
+ state === STATE_RCDATA_END_TAG_NAME ||
3011
+ state === STATE_RAWTEXT_END_TAG_NAME ||
3012
+ state === STATE_SCRIPT_DATA_END_TAG_NAME ||
3013
+ state === STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME
3014
+ ) {
3015
+ // EOF mid-tag — emit the partial open/close tag at EOF so the
3016
+ // consumer still sees the tag. This is a deliberate deviation
3017
+ // from the spec's per-character emission model: rather than
3018
+ // dropping the in-progress tag, we emit its offset range up to EOF.
3019
+ reportError("eof-in-tag", len, len, "error");
3020
+ // If we hit EOF mid-attribute-name, the name runs to EOF. Set
3021
+ // attrNameEnd here so the emitted attribute range is valid.
3022
+ if (state === STATE_ATTRIBUTE_NAME && attrNameStart !== -1) {
3023
+ attrNameEnd = len;
3024
+ }
3025
+ if (attrNameStart !== -1) emitAttribute(len);
3026
+ // If we hit EOF before the tag-name end was recorded, the name runs
3027
+ // to EOF. `tagNameEnd` may carry over from a previously emitted tag,
3028
+ // so reset it whenever it's missing or stale (less than `tagNameStart`)
3029
+ // — covers `<div` open-tag EOFs as well as `<title>x</tit` and other
3030
+ // content-mode end-tag-name EOFs.
3031
+ if (tagNameStart !== -1 && tagNameEnd < tagNameStart) {
3032
+ tagNameEnd = len;
3033
+ }
3034
+ flushText(tagStart);
3035
+ pos =
3036
+ input.charCodeAt(tagStart + 1) === CC_SOLIDUS
3037
+ ? emitCloseTag(len)
3038
+ : emitOpenTag(len, false);
3039
+ } else if (
3040
+ (state >= STATE_COMMENT_START && state <= STATE_BOGUS_COMMENT) ||
2666
3041
  (state >= STATE_COMMENT_LESS_THAN_SIGN &&
2667
3042
  state <= STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH) ||
2668
- (state >= STATE_CDATA_SECTION && state <= STATE_CDATA_SECTION_END)
3043
+ state === STATE_MARKUP_DECLARATION_OPEN
2669
3044
  ) {
3045
+ // Bogus comments at EOF are normal per spec (no parse error).
3046
+ if (state !== STATE_BOGUS_COMMENT) {
3047
+ reportError("eof-in-comment", len, len, "error");
3048
+ }
3049
+ if (callbacks.comment !== undefined) {
3050
+ pos = callbacks.comment(input, commentStart, len);
3051
+ }
3052
+ } else if (state >= STATE_CDATA_SECTION && state <= STATE_CDATA_SECTION_END) {
3053
+ reportError("eof-in-cdata", len, len, "error");
2670
3054
  if (callbacks.comment !== undefined) {
2671
3055
  pos = callbacks.comment(input, commentStart, len);
2672
3056
  }
2673
3057
  } else if (state >= STATE_DOCTYPE && state <= STATE_BOGUS_DOCTYPE) {
3058
+ reportError("eof-in-doctype", len, len, "error");
2674
3059
  if (callbacks.doctype !== undefined) {
2675
3060
  pos = callbacks.doctype(input, commentStart, len);
2676
3061
  }
2677
- } else if (textStart < len && callbacks.text !== undefined) {
2678
- callbacks.text(input, textStart, len);
3062
+ } else {
3063
+ if (
3064
+ state === STATE_SCRIPT_DATA_ESCAPED ||
3065
+ state === STATE_SCRIPT_DATA_ESCAPED_DASH ||
3066
+ state === STATE_SCRIPT_DATA_ESCAPED_DASH_DASH ||
3067
+ state === STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN ||
3068
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED ||
3069
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH ||
3070
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH ||
3071
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN ||
3072
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END
3073
+ ) {
3074
+ // Inside `<script><!-- … ` at EOF — spec calls this an
3075
+ // eof-in-script-html-comment-like-text parse error. The
3076
+ // less-than-sign and double-escape-end states reconsume back
3077
+ // into the (double-)escaped state on EOF per spec, which then
3078
+ // hits this same error.
3079
+ reportError("eof-in-script-html-comment-like-text", len, len, "error");
3080
+ } else if (state === STATE_TAG_OPEN || state === STATE_END_TAG_OPEN) {
3081
+ // `<` or `</` with nothing after; spec calls this
3082
+ // eof-before-tag-name. The lone `<` / `</` is preserved in the
3083
+ // pending text span which is flushed below.
3084
+ reportError("eof-before-tag-name", len, len, "warning");
3085
+ }
3086
+ if (textStart < len && callbacks.text !== undefined) {
3087
+ callbacks.text(input, textStart, len);
3088
+ }
2679
3089
  }
2680
3090
 
2681
3091
  return pos;
@@ -2685,49 +3095,155 @@ walkHtmlTokens.QUOTE_NONE = QUOTE_NONE;
2685
3095
  walkHtmlTokens.QUOTE_SINGLE = QUOTE_SINGLE;
2686
3096
  walkHtmlTokens.QUOTE_DOUBLE = QUOTE_DOUBLE;
2687
3097
 
2688
- const MINIMAL_ENTITIES = {
2689
- amp: "&",
2690
- lt: "<",
2691
- gt: ">",
2692
- quot: '"',
2693
- apos: "'",
2694
- nbsp: "\u00A0"
3098
+ // WHATWG numeric-character-reference-end Windows-1252 remap table for the
3099
+ // 0x80-0x9F range. Per spec these C1 control code points decode to the
3100
+ // corresponding Windows-1252 glyph (with a parse error) rather than to the
3101
+ // raw C1 control character.
3102
+ const NUMERIC_C1_REMAP = {
3103
+ 0x80: "",
3104
+ 0x82: "",
3105
+ 0x83: "ƒ",
3106
+ 0x84: "„",
3107
+ 0x85: "…",
3108
+ 0x86: "†",
3109
+ 0x87: "‡",
3110
+ 0x88: "ˆ",
3111
+ 0x89: "‰",
3112
+ 0x8a: "Š",
3113
+ 0x8b: "‹",
3114
+ 0x8c: "Œ",
3115
+ 0x8e: "Ž",
3116
+ 0x91: "‘",
3117
+ 0x92: "’",
3118
+ 0x93: "“",
3119
+ 0x94: "”",
3120
+ 0x95: "•",
3121
+ 0x96: "–",
3122
+ 0x97: "—",
3123
+ 0x98: "˜",
3124
+ 0x99: "™",
3125
+ 0x9a: "š",
3126
+ 0x9b: "›",
3127
+ 0x9c: "œ",
3128
+ 0x9e: "ž",
3129
+ 0x9f: "Ÿ"
2695
3130
  };
2696
3131
 
2697
3132
  /**
2698
- * Minimal entity decoder for safe string resolution without bundle bloat.
2699
- * Decodes the core URL-safe named entities and all numeric references.
2700
- * Leaves unknown entities as literal strings to prevent silent character drops.
3133
+ * @param {number} code numeric character reference code point
3134
+ * @returns {string} decoded character per WHATWG remap rules
3135
+ */
3136
+ const decodeNumericReference = (code) => {
3137
+ // Per WHATWG numeric-character-reference-end-state:
3138
+ // - 0x00, > 0x10FFFF, or surrogate (0xD800-0xDFFF) -> U+FFFD.
3139
+ // - 0x80-0x9F -> Windows-1252 remap (above).
3140
+ // - Anything else (including noncharacters and C0 controls) -> the
3141
+ // code point itself; we don't surface the spec's parse-error
3142
+ // classes here since decoding is happening after the scanner ran.
3143
+ if (code === 0 || code > 0x10ffff || (code >= 0xd800 && code <= 0xdfff)) {
3144
+ return "�";
3145
+ }
3146
+ if (code >= 0x80 && code <= 0x9f) {
3147
+ const remapped = /** @type {Record<number, string>} */ (NUMERIC_C1_REMAP)[
3148
+ code
3149
+ ];
3150
+ if (remapped !== undefined) return remapped;
3151
+ }
3152
+ return String.fromCodePoint(code);
3153
+ };
3154
+
3155
+ /**
3156
+ * Decode HTML character references in a string. Handles all numeric
3157
+ * references (with WHATWG remap of 0x00, surrogates, out-of-range, and the
3158
+ * C1 Windows-1252 table) and the full WHATWG named character references
3159
+ * table. Unknown or malformed references are left as literal text.
3160
+ *
3161
+ * When `isAttribute` is `true`, applies the WHATWG
3162
+ * "consumed-as-part-of-an-attribute" rule: a named reference without a
3163
+ * trailing `;` whose next character is `=` or ASCII alphanumeric is left
3164
+ * undecoded, so e.g. `&amp=foo` stays literal in an attribute value but
3165
+ * decodes to `&=foo` in text.
2701
3166
  * @param {string} str the raw string from the token slice
3167
+ * @param {boolean=} isAttribute true if `str` came from an attribute value
2702
3168
  * @returns {string} decoded string
2703
3169
  */
2704
- walkHtmlTokens.decodeHtmlEntities = (str) => {
3170
+ walkHtmlTokens.decodeHtmlEntities = (str, isAttribute) => {
2705
3171
  if (!str.includes("&")) return str;
2706
3172
 
2707
- return str.replace(/&(#?[0-9a-zA-Z]+);?/g, (match, entity) => {
2708
- // Decimal numeric reference: &#65;
2709
- if (entity.charCodeAt(0) === 0x23 /* # */) {
2710
- const isHex =
2711
- entity.charCodeAt(1) === 0x78 || entity.charCodeAt(1) === 0x58; // x or X
2712
- const code = isHex
2713
- ? Number.parseInt(entity.slice(2), 16)
2714
- : Number.parseInt(entity.slice(1), 10);
2715
- if (!Number.isNaN(code)) {
2716
- // Handle basic out-of-bounds (minimal approximation of WHATWG replacement char)
2717
- return code > 0x10ffff ? "\uFFFD" : String.fromCodePoint(code);
3173
+ // Match one of three forms (each with an optional trailing `;`):
3174
+ // `&#x<hex>` - hex numeric reference (requires the `x`/`X`).
3175
+ // `&#<dec>` - decimal numeric reference (digits only).
3176
+ // `&<name>` - named reference (letter followed by alphanumerics).
3177
+ // The three alternatives are kept separate so a decimal reference like
3178
+ // `&#65b` doesn't greedily eat the trailing `b` as if it were hex.
3179
+ return str.replace(
3180
+ /&(?:#[xX][0-9a-fA-F]+|#[0-9]+|[a-zA-Z][a-zA-Z0-9]*);?/g,
3181
+ (match, offset, source) => {
3182
+ // Numeric reference: &#65; or &#x41;
3183
+ if (match.charCodeAt(1) === 0x23 /* # */) {
3184
+ const lastChar = match.charAt(match.length - 1);
3185
+ const isHex =
3186
+ match.charCodeAt(2) === 0x78 || match.charCodeAt(2) === 0x58;
3187
+ const body = isHex
3188
+ ? lastChar === ";"
3189
+ ? match.slice(3, -1)
3190
+ : match.slice(3)
3191
+ : lastChar === ";"
3192
+ ? match.slice(2, -1)
3193
+ : match.slice(2);
3194
+ // The regex above guarantees at least one digit in `body`,
3195
+ // so `parseInt` always returns a finite number here.
3196
+ return decodeNumericReference(Number.parseInt(body, isHex ? 16 : 10));
2718
3197
  }
2719
- return match; // Invalid numeric (e.g. &#;)
2720
- }
2721
3198
 
2722
- // Known minimal named reference: &amp;
2723
- const key = entity.toLowerCase();
2724
- if (Object.prototype.hasOwnProperty.call(MINIMAL_ENTITIES, key)) {
2725
- return /** @type {Record<string, string>} */ (MINIMAL_ENTITIES)[key];
2726
- }
3199
+ // Named reference. Try the full captured name first, then
3200
+ // progressively shorter prefixes - this handles direct matches
3201
+ // like `&amp;` as well as WHATWG longest-prefix semantics where
3202
+ // e.g. `&notpre;` decodes as `&not` (a legacy bare entity)
3203
+ // followed by `pre;` as literal text.
3204
+ const name = match.slice(1);
3205
+ const matchEndsWithSemi = name.charCodeAt(name.length - 1) === 0x3b;
3206
+
3207
+ // Attribute-context guard: if the entity match didn't end with `;`
3208
+ // and the next character in the source is `=` or ASCII
3209
+ // alphanumeric, the WHATWG spec says to flush the literal text
3210
+ // rather than decode. The greedy regex already absorbed any
3211
+ // trailing alphanumerics, so the only candidate "next char" here
3212
+ // is `=` (or any non-alphanumeric).
3213
+ if (isAttribute && !matchEndsWithSemi) {
3214
+ const after = source.charCodeAt(offset + match.length);
3215
+ if (after === 0x3d /* = */) return match;
3216
+ }
2727
3217
 
2728
- // Unknown named entity: preserve as literal to avoid data loss
2729
- return match;
2730
- });
3218
+ // Cap the longest-prefix search at MAX_ENTITY_NAME_LEN so pathological
3219
+ // inputs like `&` + thousands of alphanumerics stay linear-time.
3220
+ // Anything past that cap can't possibly match and is appended
3221
+ // verbatim as part of `name.slice(i)`.
3222
+ const searchLen =
3223
+ name.length > MAX_ENTITY_NAME_LEN ? MAX_ENTITY_NAME_LEN : name.length;
3224
+ for (let i = searchLen; i > 0; i--) {
3225
+ const prefix = name.slice(0, i);
3226
+ if (HTML_ENTITIES[prefix] !== undefined) {
3227
+ // Attribute-context longest-prefix guard: if the matched
3228
+ // prefix doesn't end with `;` and the leftover starts with
3229
+ // an alphanumeric character, leave literal per WHATWG.
3230
+ // (The regex greedy-consumes alphanumerics, so any leftover
3231
+ // within `name` is itself alphanumeric — we only need to
3232
+ // check non-empty leftover here; the `=` case is handled
3233
+ // above against the source character after the match.)
3234
+ if (
3235
+ isAttribute &&
3236
+ i < name.length &&
3237
+ prefix.charCodeAt(prefix.length - 1) !== 0x3b
3238
+ ) {
3239
+ return match;
3240
+ }
3241
+ return HTML_ENTITIES[prefix] + name.slice(i);
3242
+ }
3243
+ }
3244
+ return match;
3245
+ }
3246
+ );
2731
3247
  };
2732
3248
 
2733
3249
  module.exports = walkHtmlTokens;