webpack 5.106.2 → 5.107.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/README.md +2 -2
  2. package/lib/APIPlugin.js +1 -1
  3. package/lib/BannerPlugin.js +3 -4
  4. package/lib/Cache.js +3 -6
  5. package/lib/Chunk.js +21 -25
  6. package/lib/ChunkGroup.js +57 -15
  7. package/lib/CompatibilityPlugin.js +8 -7
  8. package/lib/Compilation.js +67 -37
  9. package/lib/Compiler.js +4 -13
  10. package/lib/ContextModule.js +2 -2
  11. package/lib/DefinePlugin.js +2 -2
  12. package/lib/Dependency.js +22 -1
  13. package/lib/DependencyTemplate.js +2 -1
  14. package/lib/EnvironmentPlugin.js +1 -1
  15. package/lib/EvalSourceMapDevToolPlugin.js +8 -10
  16. package/lib/ExportsInfo.js +30 -34
  17. package/lib/ExternalModule.js +91 -26
  18. package/lib/ExternalModuleFactoryPlugin.js +7 -1
  19. package/lib/FileSystemInfo.js +187 -72
  20. package/lib/Generator.js +3 -3
  21. package/lib/HotModuleReplacementPlugin.js +26 -8
  22. package/lib/IgnorePlugin.js +2 -1
  23. package/lib/Module.js +20 -19
  24. package/lib/ModuleFactory.js +1 -1
  25. package/lib/ModuleNotFoundError.js +3 -84
  26. package/lib/ModuleSourceTypeConstants.js +51 -19
  27. package/lib/ModuleTypeConstants.js +12 -3
  28. package/lib/MultiCompiler.js +2 -2
  29. package/lib/NodeStuffPlugin.js +1 -1
  30. package/lib/NormalModule.js +119 -77
  31. package/lib/NormalModuleFactory.js +47 -27
  32. package/lib/Parser.js +1 -1
  33. package/lib/ProgressPlugin.js +129 -56
  34. package/lib/RuntimeGlobals.js +5 -5
  35. package/lib/RuntimeModule.js +9 -7
  36. package/lib/RuntimePlugin.js +12 -1
  37. package/lib/SourceMapDevToolPlugin.js +250 -49
  38. package/lib/Template.js +1 -1
  39. package/lib/TemplatedPathPlugin.js +22 -4
  40. package/lib/WarnCaseSensitiveModulesPlugin.js +70 -2
  41. package/lib/WarnDeprecatedOptionPlugin.js +1 -1
  42. package/lib/WarnNoModeSetPlugin.js +16 -1
  43. package/lib/Watching.js +2 -3
  44. package/lib/WebpackError.js +3 -77
  45. package/lib/WebpackIsIncludedPlugin.js +1 -1
  46. package/lib/WebpackOptionsApply.js +13 -1
  47. package/lib/asset/AssetBytesGenerator.js +12 -8
  48. package/lib/asset/AssetGenerator.js +36 -22
  49. package/lib/asset/AssetModulesPlugin.js +6 -8
  50. package/lib/asset/AssetSourceGenerator.js +12 -8
  51. package/lib/buildChunkGraph.js +4 -6
  52. package/lib/cache/PackFileCacheStrategy.js +4 -4
  53. package/lib/cli.js +3 -1
  54. package/lib/config/defaults.js +197 -10
  55. package/lib/config/normalization.js +3 -1
  56. package/lib/css/CssGenerator.js +320 -105
  57. package/lib/css/CssInjectStyleRuntimeModule.js +44 -42
  58. package/lib/css/CssLoadingRuntimeModule.js +22 -4
  59. package/lib/{CssModule.js → css/CssModule.js} +15 -15
  60. package/lib/css/CssModulesPlugin.js +168 -88
  61. package/lib/css/CssParser.js +566 -269
  62. package/lib/css/walkCssTokens.js +148 -2
  63. package/lib/dependencies/AMDRequireDependenciesBlockParserPlugin.js +1 -1
  64. package/lib/dependencies/CommonJsDependencyHelpers.js +63 -0
  65. package/lib/dependencies/CommonJsExportRequireDependency.js +54 -10
  66. package/lib/dependencies/CommonJsExportsParserPlugin.js +1 -1
  67. package/lib/dependencies/CommonJsFullRequireDependency.js +32 -9
  68. package/lib/dependencies/CommonJsImportsParserPlugin.js +112 -4
  69. package/lib/dependencies/CommonJsRequireDependency.js +67 -4
  70. package/lib/dependencies/ContextDependency.js +1 -1
  71. package/lib/dependencies/ContextDependencyHelpers.js +1 -1
  72. package/lib/dependencies/CreateRequireParserPlugin.js +1 -1
  73. package/lib/dependencies/CriticalDependencyWarning.js +1 -1
  74. package/lib/dependencies/CssIcssExportDependency.js +332 -67
  75. package/lib/dependencies/CssIcssImportDependency.js +49 -7
  76. package/lib/dependencies/CssIcssSymbolDependency.js +11 -3
  77. package/lib/dependencies/CssImportDependency.js +8 -0
  78. package/lib/dependencies/CssUrlDependency.js +28 -2
  79. package/lib/dependencies/HarmonyDetectionParserPlugin.js +22 -2
  80. package/lib/dependencies/HarmonyExportDependencyParserPlugin.js +8 -7
  81. package/lib/dependencies/HarmonyExportExpressionDependency.js +22 -14
  82. package/lib/dependencies/HarmonyExportImportedSpecifierDependency.js +110 -3
  83. package/lib/dependencies/HarmonyImportDependency.js +10 -2
  84. package/lib/dependencies/HarmonyImportDependencyParserPlugin.js +22 -1
  85. package/lib/dependencies/HarmonyImportSpecifierDependency.js +1 -1
  86. package/lib/{HarmonyLinkingError.js → dependencies/HarmonyLinkingError.js} +5 -3
  87. package/lib/dependencies/HtmlInlineScriptDependency.js +133 -0
  88. package/lib/dependencies/HtmlInlineStyleDependency.js +101 -0
  89. package/lib/dependencies/HtmlScriptSrcDependency.js +557 -0
  90. package/lib/dependencies/HtmlSourceDependency.js +128 -0
  91. package/lib/dependencies/ImportMetaContextDependencyParserPlugin.js +1 -1
  92. package/lib/dependencies/ImportParserPlugin.js +2 -2
  93. package/lib/dependencies/ImportPhase.js +1 -1
  94. package/lib/dependencies/RequireIncludeDependencyParserPlugin.js +1 -1
  95. package/lib/{RequireJsStuffPlugin.js → dependencies/RequireJsStuffPlugin.js} +7 -7
  96. package/lib/dependencies/SystemPlugin.js +1 -1
  97. package/lib/dependencies/WebAssemblyImportDependency.js +1 -1
  98. package/lib/dependencies/WorkerPlugin.js +2 -2
  99. package/lib/{DelegatedModule.js → dll/DelegatedModule.js} +31 -31
  100. package/lib/{DelegatedModuleFactoryPlugin.js → dll/DelegatedModuleFactoryPlugin.js} +4 -4
  101. package/lib/{DelegatedPlugin.js → dll/DelegatedPlugin.js} +2 -2
  102. package/lib/{DllEntryPlugin.js → dll/DllEntryPlugin.js} +4 -4
  103. package/lib/{DllModule.js → dll/DllModule.js} +24 -24
  104. package/lib/{DllModuleFactory.js → dll/DllModuleFactory.js} +4 -4
  105. package/lib/{DllPlugin.js → dll/DllPlugin.js} +6 -5
  106. package/lib/{DllReferencePlugin.js → dll/DllReferencePlugin.js} +14 -14
  107. package/lib/{LibManifestPlugin.js → dll/LibManifestPlugin.js} +9 -9
  108. package/lib/{AsyncDependencyToInitialChunkError.js → errors/AsyncDependencyToInitialChunkError.js} +2 -2
  109. package/lib/errors/BuildCycleError.js +1 -1
  110. package/lib/{ChunkRenderError.js → errors/ChunkRenderError.js} +1 -1
  111. package/lib/{CodeGenerationError.js → errors/CodeGenerationError.js} +1 -1
  112. package/lib/{CommentCompilationWarning.js → errors/CommentCompilationWarning.js} +3 -3
  113. package/lib/{ConcurrentCompilationError.js → errors/ConcurrentCompilationError.js} +4 -2
  114. package/lib/{EnvironmentNotSupportAsyncWarning.js → errors/EnvironmentNotSupportAsyncWarning.js} +4 -4
  115. package/lib/{HookWebpackError.js → errors/HookWebpackError.js} +5 -5
  116. package/lib/{IgnoreErrorModuleFactory.js → errors/IgnoreErrorModuleFactory.js} +4 -4
  117. package/lib/{InvalidDependenciesModuleWarning.js → errors/InvalidDependenciesModuleWarning.js} +3 -3
  118. package/lib/errors/JSONParseError.js +114 -0
  119. package/lib/{ModuleBuildError.js → errors/ModuleBuildError.js} +5 -5
  120. package/lib/{ModuleDependencyError.js → errors/ModuleDependencyError.js} +2 -2
  121. package/lib/{ModuleDependencyWarning.js → errors/ModuleDependencyWarning.js} +4 -4
  122. package/lib/{ModuleError.js → errors/ModuleError.js} +5 -5
  123. package/lib/{ModuleHashingError.js → errors/ModuleHashingError.js} +1 -1
  124. package/lib/errors/ModuleNotFoundError.js +91 -0
  125. package/lib/{ModuleParseError.js → errors/ModuleParseError.js} +8 -6
  126. package/lib/{ModuleRestoreError.js → errors/ModuleRestoreError.js} +1 -1
  127. package/lib/{ModuleStoreError.js → errors/ModuleStoreError.js} +1 -1
  128. package/lib/{ModuleWarning.js → errors/ModuleWarning.js} +5 -5
  129. package/lib/{NodeStuffInWebError.js → errors/NodeStuffInWebError.js} +4 -4
  130. package/lib/errors/NonErrorEmittedError.js +28 -0
  131. package/lib/{UnhandledSchemeError.js → errors/UnhandledSchemeError.js} +2 -2
  132. package/lib/{UnsupportedFeatureWarning.js → errors/UnsupportedFeatureWarning.js} +3 -3
  133. package/lib/errors/WebpackError.js +84 -0
  134. package/lib/html/HtmlGenerator.js +379 -0
  135. package/lib/html/HtmlModulesPlugin.js +429 -0
  136. package/lib/html/HtmlParser.js +1489 -0
  137. package/lib/html/walkHtmlTokens.js +3249 -0
  138. package/lib/ids/IdHelpers.js +2 -1
  139. package/lib/index.js +36 -15
  140. package/lib/javascript/JavascriptModulesPlugin.js +91 -10
  141. package/lib/javascript/JavascriptParser.js +197 -16
  142. package/lib/javascript/JavascriptParserHelpers.js +1 -1
  143. package/lib/json/JsonParser.js +7 -16
  144. package/lib/library/AbstractLibraryPlugin.js +1 -1
  145. package/lib/library/EnableLibraryPlugin.js +1 -1
  146. package/lib/{FalseIIFEUmdWarning.js → library/FalseIIFEUmdWarning.js} +1 -1
  147. package/lib/library/ModuleLibraryPlugin.js +74 -0
  148. package/lib/node/NodeEnvironmentPlugin.js +4 -2
  149. package/lib/node/nodeConsole.js +113 -64
  150. package/lib/optimize/ConcatenatedModule.js +51 -6
  151. package/lib/optimize/InnerGraph.js +1 -1
  152. package/lib/optimize/InnerGraphPlugin.js +11 -1
  153. package/lib/optimize/MinMaxSizeWarning.js +4 -4
  154. package/lib/optimize/ModuleConcatenationPlugin.js +15 -7
  155. package/lib/optimize/RealContentHashPlugin.js +89 -26
  156. package/lib/optimize/SideEffectsFlagPlugin.js +112 -5
  157. package/lib/optimize/SplitChunksPlugin.js +5 -5
  158. package/lib/performance/AssetsOverSizeLimitWarning.js +2 -2
  159. package/lib/performance/EntrypointsOverSizeLimitWarning.js +2 -2
  160. package/lib/performance/NoAsyncChunksWarning.js +5 -3
  161. package/lib/performance/SizeLimitsPlugin.js +1 -1
  162. package/lib/prefetch/ChunkPrefetchTriggerRuntimeModule.js +4 -1
  163. package/lib/rules/UseEffectRulePlugin.js +4 -3
  164. package/lib/runtime/AutoPublicPathRuntimeModule.js +3 -3
  165. package/lib/runtime/GetChunkFilenameRuntimeModule.js +5 -5
  166. package/lib/runtime/MakeDeferredNamespaceObjectRuntime.js +119 -13
  167. package/lib/runtime/SetAnonymousDefaultNameRuntimeModule.js +35 -0
  168. package/lib/schemes/DataUriPlugin.js +13 -1
  169. package/lib/schemes/VirtualUrlPlugin.js +1 -1
  170. package/lib/serialization/SerializerMiddleware.js +2 -2
  171. package/lib/sharing/ConsumeSharedPlugin.js +4 -10
  172. package/lib/sharing/ConsumeSharedRuntimeModule.js +8 -4
  173. package/lib/sharing/ProvideSharedModule.js +1 -1
  174. package/lib/sharing/ProvideSharedPlugin.js +5 -5
  175. package/lib/sharing/resolveMatchedConfigs.js +1 -1
  176. package/lib/stats/DefaultStatsFactoryPlugin.js +2 -2
  177. package/lib/stats/DefaultStatsPresetPlugin.js +1 -1
  178. package/lib/stats/DefaultStatsPrinterPlugin.js +1 -1
  179. package/lib/stats/StatsFactory.js +1 -1
  180. package/lib/typescript/TypeScriptPlugin.js +210 -0
  181. package/lib/url/URLParserPlugin.js +2 -2
  182. package/lib/util/AsyncQueue.js +2 -2
  183. package/lib/util/Hash.js +2 -2
  184. package/lib/util/LocConverter.js +53 -0
  185. package/lib/util/SortableSet.js +1 -1
  186. package/lib/util/cleverMerge.js +2 -2
  187. package/lib/util/comparators.js +3 -3
  188. package/lib/util/concatenate.js +3 -3
  189. package/lib/util/conventions.js +42 -1
  190. package/lib/util/createMappings.js +118 -0
  191. package/lib/{formatLocation.js → util/formatLocation.js} +2 -2
  192. package/lib/{SizeFormatHelpers.js → util/formatSize.js} +3 -1
  193. package/lib/util/fs.js +8 -8
  194. package/lib/util/hash/md4.js +1 -1
  195. package/lib/util/hash/xxhash64.js +1 -1
  196. package/lib/util/identifier.js +48 -0
  197. package/lib/util/internalSerializables.js +35 -19
  198. package/lib/util/magicComment.js +10 -7
  199. package/lib/util/parseJson.js +2 -73
  200. package/lib/util/source.js +21 -0
  201. package/lib/util/topologicalSort.js +69 -0
  202. package/lib/wasm-async/AsyncWebAssemblyModulesPlugin.js +3 -4
  203. package/lib/wasm-async/AsyncWebAssemblyParser.js +1 -1
  204. package/lib/wasm-sync/UnsupportedWebAssemblyFeatureError.js +5 -3
  205. package/lib/wasm-sync/WasmFinalizeExportsPlugin.js +1 -1
  206. package/lib/wasm-sync/WebAssemblyInInitialChunkError.js +5 -3
  207. package/lib/webpack.js +3 -1
  208. package/package.json +24 -22
  209. package/schemas/WebpackOptions.check.js +1 -1
  210. package/schemas/WebpackOptions.json +129 -12
  211. package/schemas/plugins/{DllPlugin.check.d.ts → HtmlGeneratorOptions.check.d.ts} +1 -1
  212. package/schemas/plugins/HtmlGeneratorOptions.check.js +6 -0
  213. package/schemas/plugins/HtmlGeneratorOptions.json +3 -0
  214. package/schemas/plugins/ProgressPlugin.check.js +1 -1
  215. package/schemas/plugins/ProgressPlugin.json +22 -0
  216. package/schemas/plugins/container/ContainerReferencePlugin.check.js +1 -1
  217. package/schemas/plugins/container/ContainerReferencePlugin.json +1 -0
  218. package/schemas/plugins/container/ExternalsType.check.js +1 -1
  219. package/schemas/plugins/container/ModuleFederationPlugin.check.js +1 -1
  220. package/schemas/plugins/container/ModuleFederationPlugin.json +1 -0
  221. package/schemas/plugins/{DllReferencePlugin.check.d.ts → css/CssAutoOrModuleParserOptions.check.d.ts} +1 -1
  222. package/schemas/plugins/css/CssAutoOrModuleParserOptions.check.js +6 -0
  223. package/schemas/plugins/css/CssAutoOrModuleParserOptions.json +3 -0
  224. package/schemas/plugins/dll/DllPlugin.check.d.ts +7 -0
  225. package/schemas/plugins/dll/DllReferencePlugin.check.d.ts +7 -0
  226. package/types.d.ts +1153 -233
  227. package/lib/CaseSensitiveModulesWarning.js +0 -80
  228. package/lib/GraphHelpers.js +0 -49
  229. package/lib/NoModeWarning.js +0 -23
  230. package/lib/css/CssMergeStyleSheetsRuntimeModule.js +0 -57
  231. /package/lib/{AbstractMethodError.js → errors/AbstractMethodError.js} +0 -0
  232. /package/schemas/plugins/{DllPlugin.check.js → dll/DllPlugin.check.js} +0 -0
  233. /package/schemas/plugins/{DllPlugin.json → dll/DllPlugin.json} +0 -0
  234. /package/schemas/plugins/{DllReferencePlugin.check.js → dll/DllReferencePlugin.check.js} +0 -0
  235. /package/schemas/plugins/{DllReferencePlugin.json → dll/DllReferencePlugin.json} +0 -0
@@ -0,0 +1,3249 @@
1
+ /*
2
+ MIT License http://www.opensource.org/licenses/mit-license.php
3
+ Author Raj Aryan (based on SWC parser by Alexander Akait)
4
+ */
5
+
6
+ "use strict";
7
+
8
+ // cspell:ignore apos notpre noncharacters DFFF
9
+
10
+ // #region html entities
11
+ // The contents of this region are auto-generated by
12
+ // `tooling/generate-html-entities.js` from `tooling/html-entities.json`.
13
+ // Do not edit by hand — re-run the generator (via `yarn fix:special`) to refresh.
14
+ //
15
+ // WHATWG named character references. Keys are entity names WITHOUT the
16
+ // leading `&` (some end with `;`, others omit it for legacy entities that
17
+ // match without a closing semicolon). Values are the decoded character
18
+ // strings (1–2 UTF-16 code units).
19
+ // Built on a null prototype so bracket lookups (`HTML_ENTITIES[name]`)
20
+ // can't be poisoned by inherited `Object.prototype` keys like `toString`,
21
+ // `constructor`, or `__proto__` — without this, `&toString;` would falsely
22
+ // look like a matched named character reference.
23
+ // prettier-ignore
24
+ // cspell:disable-next-line
25
+ const HTML_ENTITIES = /** @type {Readonly<Record<string, string>>} */ (Object.freeze(Object.assign(Object.create(null), {"AElig":"Æ","AElig;":"Æ","AMP":"&","AMP;":"&","Aacute":"Á","Aacute;":"Á","Abreve;":"Ă","Acirc":"Â","Acirc;":"Â","Acy;":"А","Afr;":"𝔄","Agrave":"À","Agrave;":"À","Alpha;":"Α","Amacr;":"Ā","And;":"⩓","Aogon;":"Ą","Aopf;":"𝔸","ApplyFunction;":"⁡","Aring":"Å","Aring;":"Å","Ascr;":"𝒜","Assign;":"≔","Atilde":"Ã","Atilde;":"Ã","Auml":"Ä","Auml;":"Ä","Backslash;":"∖","Barv;":"⫧","Barwed;":"⌆","Bcy;":"Б","Because;":"∵","Bernoullis;":"ℬ","Beta;":"Β","Bfr;":"𝔅","Bopf;":"𝔹","Breve;":"˘","Bscr;":"ℬ","Bumpeq;":"≎","CHcy;":"Ч","COPY":"©","COPY;":"©","Cacute;":"Ć","Cap;":"⋒","CapitalDifferentialD;":"ⅅ","Cayleys;":"ℭ","Ccaron;":"Č","Ccedil":"Ç","Ccedil;":"Ç","Ccirc;":"Ĉ","Cconint;":"∰","Cdot;":"Ċ","Cedilla;":"¸","CenterDot;":"·","Cfr;":"ℭ","Chi;":"Χ","CircleDot;":"⊙","CircleMinus;":"⊖","CirclePlus;":"⊕","CircleTimes;":"⊗","ClockwiseContourIntegral;":"∲","CloseCurlyDoubleQuote;":"”","CloseCurlyQuote;":"’","Colon;":"∷","Colone;":"⩴","Congruent;":"≡","Conint;":"∯","ContourIntegral;":"∮","Copf;":"ℂ","Coproduct;":"∐","CounterClockwiseContourIntegral;":"∳","Cross;":"⨯","Cscr;":"𝒞","Cup;":"⋓","CupCap;":"≍","DD;":"ⅅ","DDotrahd;":"⤑","DJcy;":"Ђ","DScy;":"Ѕ","DZcy;":"Џ","Dagger;":"‡","Darr;":"↡","Dashv;":"⫤","Dcaron;":"Ď","Dcy;":"Д","Del;":"∇","Delta;":"Δ","Dfr;":"𝔇","DiacriticalAcute;":"´","DiacriticalDot;":"˙","DiacriticalDoubleAcute;":"˝","DiacriticalGrave;":"`","DiacriticalTilde;":"˜","Diamond;":"⋄","DifferentialD;":"ⅆ","Dopf;":"𝔻","Dot;":"¨","DotDot;":"⃜","DotEqual;":"≐","DoubleContourIntegral;":"∯","DoubleDot;":"¨","DoubleDownArrow;":"⇓","DoubleLeftArrow;":"⇐","DoubleLeftRightArrow;":"⇔","DoubleLeftTee;":"⫤","DoubleLongLeftArrow;":"⟸","DoubleLongLeftRightArrow;":"⟺","DoubleLongRightArrow;":"⟹","DoubleRightArrow;":"⇒","DoubleRightTee;":"⊨","DoubleUpArrow;":"⇑","DoubleUpDownArrow;":"⇕","DoubleVerticalBar;":"∥","DownArrow;":"↓","DownArrowBar;":"⤓","DownArrowUpArrow;":"⇵","DownBreve;":"̑","DownLeftRightVector;":"⥐","DownLeftTeeVector;":"⥞","DownLeftVector;":"↽","DownLeftVectorBar;":"⥖","DownRightTeeVector;":"⥟","DownRightVector;":"⇁","DownRightVectorBar;":"⥗","DownTee;":"⊤","DownTeeArrow;":"↧","Downarrow;":"⇓","Dscr;":"𝒟","Dstrok;":"Đ","ENG;":"Ŋ","ETH":"Ð","ETH;":"Ð","Eacute":"É","Eacute;":"É","Ecaron;":"Ě","Ecirc":"Ê","Ecirc;":"Ê","Ecy;":"Э","Edot;":"Ė","Efr;":"𝔈","Egrave":"È","Egrave;":"È","Element;":"∈","Emacr;":"Ē","EmptySmallSquare;":"◻","EmptyVerySmallSquare;":"▫","Eogon;":"Ę","Eopf;":"𝔼","Epsilon;":"Ε","Equal;":"⩵","EqualTilde;":"≂","Equilibrium;":"⇌","Escr;":"ℰ","Esim;":"⩳","Eta;":"Η","Euml":"Ë","Euml;":"Ë","Exists;":"∃","ExponentialE;":"ⅇ","Fcy;":"Ф","Ffr;":"𝔉","FilledSmallSquare;":"◼","FilledVerySmallSquare;":"▪","Fopf;":"𝔽","ForAll;":"∀","Fouriertrf;":"ℱ","Fscr;":"ℱ","GJcy;":"Ѓ","GT":">","GT;":">","Gamma;":"Γ","Gammad;":"Ϝ","Gbreve;":"Ğ","Gcedil;":"Ģ","Gcirc;":"Ĝ","Gcy;":"Г","Gdot;":"Ġ","Gfr;":"𝔊","Gg;":"⋙","Gopf;":"𝔾","GreaterEqual;":"≥","GreaterEqualLess;":"⋛","GreaterFullEqual;":"≧","GreaterGreater;":"⪢","GreaterLess;":"≷","GreaterSlantEqual;":"⩾","GreaterTilde;":"≳","Gscr;":"𝒢","Gt;":"≫","HARDcy;":"Ъ","Hacek;":"ˇ","Hat;":"^","Hcirc;":"Ĥ","Hfr;":"ℌ","HilbertSpace;":"ℋ","Hopf;":"ℍ","HorizontalLine;":"─","Hscr;":"ℋ","Hstrok;":"Ħ","HumpDownHump;":"≎","HumpEqual;":"≏","IEcy;":"Е","IJlig;":"IJ","IOcy;":"Ё","Iacute":"Í","Iacute;":"Í","Icirc":"Î","Icirc;":"Î","Icy;":"И","Idot;":"İ","Ifr;":"ℑ","Igrave":"Ì","Igrave;":"Ì","Im;":"ℑ","Imacr;":"Ī","ImaginaryI;":"ⅈ","Implies;":"⇒","Int;":"∬","Integral;":"∫","Intersection;":"⋂","InvisibleComma;":"⁣","InvisibleTimes;":"⁢","Iogon;":"Į","Iopf;":"𝕀","Iota;":"Ι","Iscr;":"ℐ","Itilde;":"Ĩ","Iukcy;":"І","Iuml":"Ï","Iuml;":"Ï","Jcirc;":"Ĵ","Jcy;":"Й","Jfr;":"𝔍","Jopf;":"𝕁","Jscr;":"𝒥","Jsercy;":"Ј","Jukcy;":"Є","KHcy;":"Х","KJcy;":"Ќ","Kappa;":"Κ","Kcedil;":"Ķ","Kcy;":"К","Kfr;":"𝔎","Kopf;":"𝕂","Kscr;":"𝒦","LJcy;":"Љ","LT":"<","LT;":"<","Lacute;":"Ĺ","Lambda;":"Λ","Lang;":"⟪","Laplacetrf;":"ℒ","Larr;":"↞","Lcaron;":"Ľ","Lcedil;":"Ļ","Lcy;":"Л","LeftAngleBracket;":"⟨","LeftArrow;":"←","LeftArrowBar;":"⇤","LeftArrowRightArrow;":"⇆","LeftCeiling;":"⌈","LeftDoubleBracket;":"⟦","LeftDownTeeVector;":"⥡","LeftDownVector;":"⇃","LeftDownVectorBar;":"⥙","LeftFloor;":"⌊","LeftRightArrow;":"↔","LeftRightVector;":"⥎","LeftTee;":"⊣","LeftTeeArrow;":"↤","LeftTeeVector;":"⥚","LeftTriangle;":"⊲","LeftTriangleBar;":"⧏","LeftTriangleEqual;":"⊴","LeftUpDownVector;":"⥑","LeftUpTeeVector;":"⥠","LeftUpVector;":"↿","LeftUpVectorBar;":"⥘","LeftVector;":"↼","LeftVectorBar;":"⥒","Leftarrow;":"⇐","Leftrightarrow;":"⇔","LessEqualGreater;":"⋚","LessFullEqual;":"≦","LessGreater;":"≶","LessLess;":"⪡","LessSlantEqual;":"⩽","LessTilde;":"≲","Lfr;":"𝔏","Ll;":"⋘","Lleftarrow;":"⇚","Lmidot;":"Ŀ","LongLeftArrow;":"⟵","LongLeftRightArrow;":"⟷","LongRightArrow;":"⟶","Longleftarrow;":"⟸","Longleftrightarrow;":"⟺","Longrightarrow;":"⟹","Lopf;":"𝕃","LowerLeftArrow;":"↙","LowerRightArrow;":"↘","Lscr;":"ℒ","Lsh;":"↰","Lstrok;":"Ł","Lt;":"≪","Map;":"⤅","Mcy;":"М","MediumSpace;":" ","Mellintrf;":"ℳ","Mfr;":"𝔐","MinusPlus;":"∓","Mopf;":"𝕄","Mscr;":"ℳ","Mu;":"Μ","NJcy;":"Њ","Nacute;":"Ń","Ncaron;":"Ň","Ncedil;":"Ņ","Ncy;":"Н","NegativeMediumSpace;":"​","NegativeThickSpace;":"​","NegativeThinSpace;":"​","NegativeVeryThinSpace;":"​","NestedGreaterGreater;":"≫","NestedLessLess;":"≪","NewLine;":"\n","Nfr;":"𝔑","NoBreak;":"⁠","NonBreakingSpace;":" ","Nopf;":"ℕ","Not;":"⫬","NotCongruent;":"≢","NotCupCap;":"≭","NotDoubleVerticalBar;":"∦","NotElement;":"∉","NotEqual;":"≠","NotEqualTilde;":"≂̸","NotExists;":"∄","NotGreater;":"≯","NotGreaterEqual;":"≱","NotGreaterFullEqual;":"≧̸","NotGreaterGreater;":"≫̸","NotGreaterLess;":"≹","NotGreaterSlantEqual;":"⩾̸","NotGreaterTilde;":"≵","NotHumpDownHump;":"≎̸","NotHumpEqual;":"≏̸","NotLeftTriangle;":"⋪","NotLeftTriangleBar;":"⧏̸","NotLeftTriangleEqual;":"⋬","NotLess;":"≮","NotLessEqual;":"≰","NotLessGreater;":"≸","NotLessLess;":"≪̸","NotLessSlantEqual;":"⩽̸","NotLessTilde;":"≴","NotNestedGreaterGreater;":"⪢̸","NotNestedLessLess;":"⪡̸","NotPrecedes;":"⊀","NotPrecedesEqual;":"⪯̸","NotPrecedesSlantEqual;":"⋠","NotReverseElement;":"∌","NotRightTriangle;":"⋫","NotRightTriangleBar;":"⧐̸","NotRightTriangleEqual;":"⋭","NotSquareSubset;":"⊏̸","NotSquareSubsetEqual;":"⋢","NotSquareSuperset;":"⊐̸","NotSquareSupersetEqual;":"⋣","NotSubset;":"⊂⃒","NotSubsetEqual;":"⊈","NotSucceeds;":"⊁","NotSucceedsEqual;":"⪰̸","NotSucceedsSlantEqual;":"⋡","NotSucceedsTilde;":"≿̸","NotSuperset;":"⊃⃒","NotSupersetEqual;":"⊉","NotTilde;":"≁","NotTildeEqual;":"≄","NotTildeFullEqual;":"≇","NotTildeTilde;":"≉","NotVerticalBar;":"∤","Nscr;":"𝒩","Ntilde":"Ñ","Ntilde;":"Ñ","Nu;":"Ν","OElig;":"Œ","Oacute":"Ó","Oacute;":"Ó","Ocirc":"Ô","Ocirc;":"Ô","Ocy;":"О","Odblac;":"Ő","Ofr;":"𝔒","Ograve":"Ò","Ograve;":"Ò","Omacr;":"Ō","Omega;":"Ω","Omicron;":"Ο","Oopf;":"𝕆","OpenCurlyDoubleQuote;":"“","OpenCurlyQuote;":"‘","Or;":"⩔","Oscr;":"𝒪","Oslash":"Ø","Oslash;":"Ø","Otilde":"Õ","Otilde;":"Õ","Otimes;":"⨷","Ouml":"Ö","Ouml;":"Ö","OverBar;":"‾","OverBrace;":"⏞","OverBracket;":"⎴","OverParenthesis;":"⏜","PartialD;":"∂","Pcy;":"П","Pfr;":"𝔓","Phi;":"Φ","Pi;":"Π","PlusMinus;":"±","Poincareplane;":"ℌ","Popf;":"ℙ","Pr;":"⪻","Precedes;":"≺","PrecedesEqual;":"⪯","PrecedesSlantEqual;":"≼","PrecedesTilde;":"≾","Prime;":"″","Product;":"∏","Proportion;":"∷","Proportional;":"∝","Pscr;":"𝒫","Psi;":"Ψ","QUOT":"\"","QUOT;":"\"","Qfr;":"𝔔","Qopf;":"ℚ","Qscr;":"𝒬","RBarr;":"⤐","REG":"®","REG;":"®","Racute;":"Ŕ","Rang;":"⟫","Rarr;":"↠","Rarrtl;":"⤖","Rcaron;":"Ř","Rcedil;":"Ŗ","Rcy;":"Р","Re;":"ℜ","ReverseElement;":"∋","ReverseEquilibrium;":"⇋","ReverseUpEquilibrium;":"⥯","Rfr;":"ℜ","Rho;":"Ρ","RightAngleBracket;":"⟩","RightArrow;":"→","RightArrowBar;":"⇥","RightArrowLeftArrow;":"⇄","RightCeiling;":"⌉","RightDoubleBracket;":"⟧","RightDownTeeVector;":"⥝","RightDownVector;":"⇂","RightDownVectorBar;":"⥕","RightFloor;":"⌋","RightTee;":"⊢","RightTeeArrow;":"↦","RightTeeVector;":"⥛","RightTriangle;":"⊳","RightTriangleBar;":"⧐","RightTriangleEqual;":"⊵","RightUpDownVector;":"⥏","RightUpTeeVector;":"⥜","RightUpVector;":"↾","RightUpVectorBar;":"⥔","RightVector;":"⇀","RightVectorBar;":"⥓","Rightarrow;":"⇒","Ropf;":"ℝ","RoundImplies;":"⥰","Rrightarrow;":"⇛","Rscr;":"ℛ","Rsh;":"↱","RuleDelayed;":"⧴","SHCHcy;":"Щ","SHcy;":"Ш","SOFTcy;":"Ь","Sacute;":"Ś","Sc;":"⪼","Scaron;":"Š","Scedil;":"Ş","Scirc;":"Ŝ","Scy;":"С","Sfr;":"𝔖","ShortDownArrow;":"↓","ShortLeftArrow;":"←","ShortRightArrow;":"→","ShortUpArrow;":"↑","Sigma;":"Σ","SmallCircle;":"∘","Sopf;":"𝕊","Sqrt;":"√","Square;":"□","SquareIntersection;":"⊓","SquareSubset;":"⊏","SquareSubsetEqual;":"⊑","SquareSuperset;":"⊐","SquareSupersetEqual;":"⊒","SquareUnion;":"⊔","Sscr;":"𝒮","Star;":"⋆","Sub;":"⋐","Subset;":"⋐","SubsetEqual;":"⊆","Succeeds;":"≻","SucceedsEqual;":"⪰","SucceedsSlantEqual;":"≽","SucceedsTilde;":"≿","SuchThat;":"∋","Sum;":"∑","Sup;":"⋑","Superset;":"⊃","SupersetEqual;":"⊇","Supset;":"⋑","THORN":"Þ","THORN;":"Þ","TRADE;":"™","TSHcy;":"Ћ","TScy;":"Ц","Tab;":"\t","Tau;":"Τ","Tcaron;":"Ť","Tcedil;":"Ţ","Tcy;":"Т","Tfr;":"𝔗","Therefore;":"∴","Theta;":"Θ","ThickSpace;":"  ","ThinSpace;":" ","Tilde;":"∼","TildeEqual;":"≃","TildeFullEqual;":"≅","TildeTilde;":"≈","Topf;":"𝕋","TripleDot;":"⃛","Tscr;":"𝒯","Tstrok;":"Ŧ","Uacute":"Ú","Uacute;":"Ú","Uarr;":"↟","Uarrocir;":"⥉","Ubrcy;":"Ў","Ubreve;":"Ŭ","Ucirc":"Û","Ucirc;":"Û","Ucy;":"У","Udblac;":"Ű","Ufr;":"𝔘","Ugrave":"Ù","Ugrave;":"Ù","Umacr;":"Ū","UnderBar;":"_","UnderBrace;":"⏟","UnderBracket;":"⎵","UnderParenthesis;":"⏝","Union;":"⋃","UnionPlus;":"⊎","Uogon;":"Ų","Uopf;":"𝕌","UpArrow;":"↑","UpArrowBar;":"⤒","UpArrowDownArrow;":"⇅","UpDownArrow;":"↕","UpEquilibrium;":"⥮","UpTee;":"⊥","UpTeeArrow;":"↥","Uparrow;":"⇑","Updownarrow;":"⇕","UpperLeftArrow;":"↖","UpperRightArrow;":"↗","Upsi;":"ϒ","Upsilon;":"Υ","Uring;":"Ů","Uscr;":"𝒰","Utilde;":"Ũ","Uuml":"Ü","Uuml;":"Ü","VDash;":"⊫","Vbar;":"⫫","Vcy;":"В","Vdash;":"⊩","Vdashl;":"⫦","Vee;":"⋁","Verbar;":"‖","Vert;":"‖","VerticalBar;":"∣","VerticalLine;":"|","VerticalSeparator;":"❘","VerticalTilde;":"≀","VeryThinSpace;":" ","Vfr;":"𝔙","Vopf;":"𝕍","Vscr;":"𝒱","Vvdash;":"⊪","Wcirc;":"Ŵ","Wedge;":"⋀","Wfr;":"𝔚","Wopf;":"𝕎","Wscr;":"𝒲","Xfr;":"𝔛","Xi;":"Ξ","Xopf;":"𝕏","Xscr;":"𝒳","YAcy;":"Я","YIcy;":"Ї","YUcy;":"Ю","Yacute":"Ý","Yacute;":"Ý","Ycirc;":"Ŷ","Ycy;":"Ы","Yfr;":"𝔜","Yopf;":"𝕐","Yscr;":"𝒴","Yuml;":"Ÿ","ZHcy;":"Ж","Zacute;":"Ź","Zcaron;":"Ž","Zcy;":"З","Zdot;":"Ż","ZeroWidthSpace;":"​","Zeta;":"Ζ","Zfr;":"ℨ","Zopf;":"ℤ","Zscr;":"𝒵","aacute":"á","aacute;":"á","abreve;":"ă","ac;":"∾","acE;":"∾̳","acd;":"∿","acirc":"â","acirc;":"â","acute":"´","acute;":"´","acy;":"а","aelig":"æ","aelig;":"æ","af;":"⁡","afr;":"𝔞","agrave":"à","agrave;":"à","alefsym;":"ℵ","aleph;":"ℵ","alpha;":"α","amacr;":"ā","amalg;":"⨿","amp":"&","amp;":"&","and;":"∧","andand;":"⩕","andd;":"⩜","andslope;":"⩘","andv;":"⩚","ang;":"∠","ange;":"⦤","angle;":"∠","angmsd;":"∡","angmsdaa;":"⦨","angmsdab;":"⦩","angmsdac;":"⦪","angmsdad;":"⦫","angmsdae;":"⦬","angmsdaf;":"⦭","angmsdag;":"⦮","angmsdah;":"⦯","angrt;":"∟","angrtvb;":"⊾","angrtvbd;":"⦝","angsph;":"∢","angst;":"Å","angzarr;":"⍼","aogon;":"ą","aopf;":"𝕒","ap;":"≈","apE;":"⩰","apacir;":"⩯","ape;":"≊","apid;":"≋","apos;":"'","approx;":"≈","approxeq;":"≊","aring":"å","aring;":"å","ascr;":"𝒶","ast;":"*","asymp;":"≈","asympeq;":"≍","atilde":"ã","atilde;":"ã","auml":"ä","auml;":"ä","awconint;":"∳","awint;":"⨑","bNot;":"⫭","backcong;":"≌","backepsilon;":"϶","backprime;":"‵","backsim;":"∽","backsimeq;":"⋍","barvee;":"⊽","barwed;":"⌅","barwedge;":"⌅","bbrk;":"⎵","bbrktbrk;":"⎶","bcong;":"≌","bcy;":"б","bdquo;":"„","becaus;":"∵","because;":"∵","bemptyv;":"⦰","bepsi;":"϶","bernou;":"ℬ","beta;":"β","beth;":"ℶ","between;":"≬","bfr;":"𝔟","bigcap;":"⋂","bigcirc;":"◯","bigcup;":"⋃","bigodot;":"⨀","bigoplus;":"⨁","bigotimes;":"⨂","bigsqcup;":"⨆","bigstar;":"★","bigtriangledown;":"▽","bigtriangleup;":"△","biguplus;":"⨄","bigvee;":"⋁","bigwedge;":"⋀","bkarow;":"⤍","blacklozenge;":"⧫","blacksquare;":"▪","blacktriangle;":"▴","blacktriangledown;":"▾","blacktriangleleft;":"◂","blacktriangleright;":"▸","blank;":"␣","blk12;":"▒","blk14;":"░","blk34;":"▓","block;":"█","bne;":"=⃥","bnequiv;":"≡⃥","bnot;":"⌐","bopf;":"𝕓","bot;":"⊥","bottom;":"⊥","bowtie;":"⋈","boxDL;":"╗","boxDR;":"╔","boxDl;":"╖","boxDr;":"╓","boxH;":"═","boxHD;":"╦","boxHU;":"╩","boxHd;":"╤","boxHu;":"╧","boxUL;":"╝","boxUR;":"╚","boxUl;":"╜","boxUr;":"╙","boxV;":"║","boxVH;":"╬","boxVL;":"╣","boxVR;":"╠","boxVh;":"╫","boxVl;":"╢","boxVr;":"╟","boxbox;":"⧉","boxdL;":"╕","boxdR;":"╒","boxdl;":"┐","boxdr;":"┌","boxh;":"─","boxhD;":"╥","boxhU;":"╨","boxhd;":"┬","boxhu;":"┴","boxminus;":"⊟","boxplus;":"⊞","boxtimes;":"⊠","boxuL;":"╛","boxuR;":"╘","boxul;":"┘","boxur;":"└","boxv;":"│","boxvH;":"╪","boxvL;":"╡","boxvR;":"╞","boxvh;":"┼","boxvl;":"┤","boxvr;":"├","bprime;":"‵","breve;":"˘","brvbar":"¦","brvbar;":"¦","bscr;":"𝒷","bsemi;":"⁏","bsim;":"∽","bsime;":"⋍","bsol;":"\\","bsolb;":"⧅","bsolhsub;":"⟈","bull;":"•","bullet;":"•","bump;":"≎","bumpE;":"⪮","bumpe;":"≏","bumpeq;":"≏","cacute;":"ć","cap;":"∩","capand;":"⩄","capbrcup;":"⩉","capcap;":"⩋","capcup;":"⩇","capdot;":"⩀","caps;":"∩︀","caret;":"⁁","caron;":"ˇ","ccaps;":"⩍","ccaron;":"č","ccedil":"ç","ccedil;":"ç","ccirc;":"ĉ","ccups;":"⩌","ccupssm;":"⩐","cdot;":"ċ","cedil":"¸","cedil;":"¸","cemptyv;":"⦲","cent":"¢","cent;":"¢","centerdot;":"·","cfr;":"𝔠","chcy;":"ч","check;":"✓","checkmark;":"✓","chi;":"χ","cir;":"○","cirE;":"⧃","circ;":"ˆ","circeq;":"≗","circlearrowleft;":"↺","circlearrowright;":"↻","circledR;":"®","circledS;":"Ⓢ","circledast;":"⊛","circledcirc;":"⊚","circleddash;":"⊝","cire;":"≗","cirfnint;":"⨐","cirmid;":"⫯","cirscir;":"⧂","clubs;":"♣","clubsuit;":"♣","colon;":":","colone;":"≔","coloneq;":"≔","comma;":",","commat;":"@","comp;":"∁","compfn;":"∘","complement;":"∁","complexes;":"ℂ","cong;":"≅","congdot;":"⩭","conint;":"∮","copf;":"𝕔","coprod;":"∐","copy":"©","copy;":"©","copysr;":"℗","crarr;":"↵","cross;":"✗","cscr;":"𝒸","csub;":"⫏","csube;":"⫑","csup;":"⫐","csupe;":"⫒","ctdot;":"⋯","cudarrl;":"⤸","cudarrr;":"⤵","cuepr;":"⋞","cuesc;":"⋟","cularr;":"↶","cularrp;":"⤽","cup;":"∪","cupbrcap;":"⩈","cupcap;":"⩆","cupcup;":"⩊","cupdot;":"⊍","cupor;":"⩅","cups;":"∪︀","curarr;":"↷","curarrm;":"⤼","curlyeqprec;":"⋞","curlyeqsucc;":"⋟","curlyvee;":"⋎","curlywedge;":"⋏","curren":"¤","curren;":"¤","curvearrowleft;":"↶","curvearrowright;":"↷","cuvee;":"⋎","cuwed;":"⋏","cwconint;":"∲","cwint;":"∱","cylcty;":"⌭","dArr;":"⇓","dHar;":"⥥","dagger;":"†","daleth;":"ℸ","darr;":"↓","dash;":"‐","dashv;":"⊣","dbkarow;":"⤏","dblac;":"˝","dcaron;":"ď","dcy;":"д","dd;":"ⅆ","ddagger;":"‡","ddarr;":"⇊","ddotseq;":"⩷","deg":"°","deg;":"°","delta;":"δ","demptyv;":"⦱","dfisht;":"⥿","dfr;":"𝔡","dharl;":"⇃","dharr;":"⇂","diam;":"⋄","diamond;":"⋄","diamondsuit;":"♦","diams;":"♦","die;":"¨","digamma;":"ϝ","disin;":"⋲","div;":"÷","divide":"÷","divide;":"÷","divideontimes;":"⋇","divonx;":"⋇","djcy;":"ђ","dlcorn;":"⌞","dlcrop;":"⌍","dollar;":"$","dopf;":"𝕕","dot;":"˙","doteq;":"≐","doteqdot;":"≑","dotminus;":"∸","dotplus;":"∔","dotsquare;":"⊡","doublebarwedge;":"⌆","downarrow;":"↓","downdownarrows;":"⇊","downharpoonleft;":"⇃","downharpoonright;":"⇂","drbkarow;":"⤐","drcorn;":"⌟","drcrop;":"⌌","dscr;":"𝒹","dscy;":"ѕ","dsol;":"⧶","dstrok;":"đ","dtdot;":"⋱","dtri;":"▿","dtrif;":"▾","duarr;":"⇵","duhar;":"⥯","dwangle;":"⦦","dzcy;":"џ","dzigrarr;":"⟿","eDDot;":"⩷","eDot;":"≑","eacute":"é","eacute;":"é","easter;":"⩮","ecaron;":"ě","ecir;":"≖","ecirc":"ê","ecirc;":"ê","ecolon;":"≕","ecy;":"э","edot;":"ė","ee;":"ⅇ","efDot;":"≒","efr;":"𝔢","eg;":"⪚","egrave":"è","egrave;":"è","egs;":"⪖","egsdot;":"⪘","el;":"⪙","elinters;":"⏧","ell;":"ℓ","els;":"⪕","elsdot;":"⪗","emacr;":"ē","empty;":"∅","emptyset;":"∅","emptyv;":"∅","emsp13;":" ","emsp14;":" ","emsp;":" ","eng;":"ŋ","ensp;":" ","eogon;":"ę","eopf;":"𝕖","epar;":"⋕","eparsl;":"⧣","eplus;":"⩱","epsi;":"ε","epsilon;":"ε","epsiv;":"ϵ","eqcirc;":"≖","eqcolon;":"≕","eqsim;":"≂","eqslantgtr;":"⪖","eqslantless;":"⪕","equals;":"=","equest;":"≟","equiv;":"≡","equivDD;":"⩸","eqvparsl;":"⧥","erDot;":"≓","erarr;":"⥱","escr;":"ℯ","esdot;":"≐","esim;":"≂","eta;":"η","eth":"ð","eth;":"ð","euml":"ë","euml;":"ë","euro;":"€","excl;":"!","exist;":"∃","expectation;":"ℰ","exponentiale;":"ⅇ","fallingdotseq;":"≒","fcy;":"ф","female;":"♀","ffilig;":"ffi","fflig;":"ff","ffllig;":"ffl","ffr;":"𝔣","filig;":"fi","fjlig;":"fj","flat;":"♭","fllig;":"fl","fltns;":"▱","fnof;":"ƒ","fopf;":"𝕗","forall;":"∀","fork;":"⋔","forkv;":"⫙","fpartint;":"⨍","frac12":"½","frac12;":"½","frac13;":"⅓","frac14":"¼","frac14;":"¼","frac15;":"⅕","frac16;":"⅙","frac18;":"⅛","frac23;":"⅔","frac25;":"⅖","frac34":"¾","frac34;":"¾","frac35;":"⅗","frac38;":"⅜","frac45;":"⅘","frac56;":"⅚","frac58;":"⅝","frac78;":"⅞","frasl;":"⁄","frown;":"⌢","fscr;":"𝒻","gE;":"≧","gEl;":"⪌","gacute;":"ǵ","gamma;":"γ","gammad;":"ϝ","gap;":"⪆","gbreve;":"ğ","gcirc;":"ĝ","gcy;":"г","gdot;":"ġ","ge;":"≥","gel;":"⋛","geq;":"≥","geqq;":"≧","geqslant;":"⩾","ges;":"⩾","gescc;":"⪩","gesdot;":"⪀","gesdoto;":"⪂","gesdotol;":"⪄","gesl;":"⋛︀","gesles;":"⪔","gfr;":"𝔤","gg;":"≫","ggg;":"⋙","gimel;":"ℷ","gjcy;":"ѓ","gl;":"≷","glE;":"⪒","gla;":"⪥","glj;":"⪤","gnE;":"≩","gnap;":"⪊","gnapprox;":"⪊","gne;":"⪈","gneq;":"⪈","gneqq;":"≩","gnsim;":"⋧","gopf;":"𝕘","grave;":"`","gscr;":"ℊ","gsim;":"≳","gsime;":"⪎","gsiml;":"⪐","gt":">","gt;":">","gtcc;":"⪧","gtcir;":"⩺","gtdot;":"⋗","gtlPar;":"⦕","gtquest;":"⩼","gtrapprox;":"⪆","gtrarr;":"⥸","gtrdot;":"⋗","gtreqless;":"⋛","gtreqqless;":"⪌","gtrless;":"≷","gtrsim;":"≳","gvertneqq;":"≩︀","gvnE;":"≩︀","hArr;":"⇔","hairsp;":" ","half;":"½","hamilt;":"ℋ","hardcy;":"ъ","harr;":"↔","harrcir;":"⥈","harrw;":"↭","hbar;":"ℏ","hcirc;":"ĥ","hearts;":"♥","heartsuit;":"♥","hellip;":"…","hercon;":"⊹","hfr;":"𝔥","hksearow;":"⤥","hkswarow;":"⤦","hoarr;":"⇿","homtht;":"∻","hookleftarrow;":"↩","hookrightarrow;":"↪","hopf;":"𝕙","horbar;":"―","hscr;":"𝒽","hslash;":"ℏ","hstrok;":"ħ","hybull;":"⁃","hyphen;":"‐","iacute":"í","iacute;":"í","ic;":"⁣","icirc":"î","icirc;":"î","icy;":"и","iecy;":"е","iexcl":"¡","iexcl;":"¡","iff;":"⇔","ifr;":"𝔦","igrave":"ì","igrave;":"ì","ii;":"ⅈ","iiiint;":"⨌","iiint;":"∭","iinfin;":"⧜","iiota;":"℩","ijlig;":"ij","imacr;":"ī","image;":"ℑ","imagline;":"ℐ","imagpart;":"ℑ","imath;":"ı","imof;":"⊷","imped;":"Ƶ","in;":"∈","incare;":"℅","infin;":"∞","infintie;":"⧝","inodot;":"ı","int;":"∫","intcal;":"⊺","integers;":"ℤ","intercal;":"⊺","intlarhk;":"⨗","intprod;":"⨼","iocy;":"ё","iogon;":"į","iopf;":"𝕚","iota;":"ι","iprod;":"⨼","iquest":"¿","iquest;":"¿","iscr;":"𝒾","isin;":"∈","isinE;":"⋹","isindot;":"⋵","isins;":"⋴","isinsv;":"⋳","isinv;":"∈","it;":"⁢","itilde;":"ĩ","iukcy;":"і","iuml":"ï","iuml;":"ï","jcirc;":"ĵ","jcy;":"й","jfr;":"𝔧","jmath;":"ȷ","jopf;":"𝕛","jscr;":"𝒿","jsercy;":"ј","jukcy;":"є","kappa;":"κ","kappav;":"ϰ","kcedil;":"ķ","kcy;":"к","kfr;":"𝔨","kgreen;":"ĸ","khcy;":"х","kjcy;":"ќ","kopf;":"𝕜","kscr;":"𝓀","lAarr;":"⇚","lArr;":"⇐","lAtail;":"⤛","lBarr;":"⤎","lE;":"≦","lEg;":"⪋","lHar;":"⥢","lacute;":"ĺ","laemptyv;":"⦴","lagran;":"ℒ","lambda;":"λ","lang;":"⟨","langd;":"⦑","langle;":"⟨","lap;":"⪅","laquo":"«","laquo;":"«","larr;":"←","larrb;":"⇤","larrbfs;":"⤟","larrfs;":"⤝","larrhk;":"↩","larrlp;":"↫","larrpl;":"⤹","larrsim;":"⥳","larrtl;":"↢","lat;":"⪫","latail;":"⤙","late;":"⪭","lates;":"⪭︀","lbarr;":"⤌","lbbrk;":"❲","lbrace;":"{","lbrack;":"[","lbrke;":"⦋","lbrksld;":"⦏","lbrkslu;":"⦍","lcaron;":"ľ","lcedil;":"ļ","lceil;":"⌈","lcub;":"{","lcy;":"л","ldca;":"⤶","ldquo;":"“","ldquor;":"„","ldrdhar;":"⥧","ldrushar;":"⥋","ldsh;":"↲","le;":"≤","leftarrow;":"←","leftarrowtail;":"↢","leftharpoondown;":"↽","leftharpoonup;":"↼","leftleftarrows;":"⇇","leftrightarrow;":"↔","leftrightarrows;":"⇆","leftrightharpoons;":"⇋","leftrightsquigarrow;":"↭","leftthreetimes;":"⋋","leg;":"⋚","leq;":"≤","leqq;":"≦","leqslant;":"⩽","les;":"⩽","lescc;":"⪨","lesdot;":"⩿","lesdoto;":"⪁","lesdotor;":"⪃","lesg;":"⋚︀","lesges;":"⪓","lessapprox;":"⪅","lessdot;":"⋖","lesseqgtr;":"⋚","lesseqqgtr;":"⪋","lessgtr;":"≶","lesssim;":"≲","lfisht;":"⥼","lfloor;":"⌊","lfr;":"𝔩","lg;":"≶","lgE;":"⪑","lhard;":"↽","lharu;":"↼","lharul;":"⥪","lhblk;":"▄","ljcy;":"љ","ll;":"≪","llarr;":"⇇","llcorner;":"⌞","llhard;":"⥫","lltri;":"◺","lmidot;":"ŀ","lmoust;":"⎰","lmoustache;":"⎰","lnE;":"≨","lnap;":"⪉","lnapprox;":"⪉","lne;":"⪇","lneq;":"⪇","lneqq;":"≨","lnsim;":"⋦","loang;":"⟬","loarr;":"⇽","lobrk;":"⟦","longleftarrow;":"⟵","longleftrightarrow;":"⟷","longmapsto;":"⟼","longrightarrow;":"⟶","looparrowleft;":"↫","looparrowright;":"↬","lopar;":"⦅","lopf;":"𝕝","loplus;":"⨭","lotimes;":"⨴","lowast;":"∗","lowbar;":"_","loz;":"◊","lozenge;":"◊","lozf;":"⧫","lpar;":"(","lparlt;":"⦓","lrarr;":"⇆","lrcorner;":"⌟","lrhar;":"⇋","lrhard;":"⥭","lrm;":"‎","lrtri;":"⊿","lsaquo;":"‹","lscr;":"𝓁","lsh;":"↰","lsim;":"≲","lsime;":"⪍","lsimg;":"⪏","lsqb;":"[","lsquo;":"‘","lsquor;":"‚","lstrok;":"ł","lt":"<","lt;":"<","ltcc;":"⪦","ltcir;":"⩹","ltdot;":"⋖","lthree;":"⋋","ltimes;":"⋉","ltlarr;":"⥶","ltquest;":"⩻","ltrPar;":"⦖","ltri;":"◃","ltrie;":"⊴","ltrif;":"◂","lurdshar;":"⥊","luruhar;":"⥦","lvertneqq;":"≨︀","lvnE;":"≨︀","mDDot;":"∺","macr":"¯","macr;":"¯","male;":"♂","malt;":"✠","maltese;":"✠","map;":"↦","mapsto;":"↦","mapstodown;":"↧","mapstoleft;":"↤","mapstoup;":"↥","marker;":"▮","mcomma;":"⨩","mcy;":"м","mdash;":"—","measuredangle;":"∡","mfr;":"𝔪","mho;":"℧","micro":"µ","micro;":"µ","mid;":"∣","midast;":"*","midcir;":"⫰","middot":"·","middot;":"·","minus;":"−","minusb;":"⊟","minusd;":"∸","minusdu;":"⨪","mlcp;":"⫛","mldr;":"…","mnplus;":"∓","models;":"⊧","mopf;":"𝕞","mp;":"∓","mscr;":"𝓂","mstpos;":"∾","mu;":"μ","multimap;":"⊸","mumap;":"⊸","nGg;":"⋙̸","nGt;":"≫⃒","nGtv;":"≫̸","nLeftarrow;":"⇍","nLeftrightarrow;":"⇎","nLl;":"⋘̸","nLt;":"≪⃒","nLtv;":"≪̸","nRightarrow;":"⇏","nVDash;":"⊯","nVdash;":"⊮","nabla;":"∇","nacute;":"ń","nang;":"∠⃒","nap;":"≉","napE;":"⩰̸","napid;":"≋̸","napos;":"ʼn","napprox;":"≉","natur;":"♮","natural;":"♮","naturals;":"ℕ","nbsp":" ","nbsp;":" ","nbump;":"≎̸","nbumpe;":"≏̸","ncap;":"⩃","ncaron;":"ň","ncedil;":"ņ","ncong;":"≇","ncongdot;":"⩭̸","ncup;":"⩂","ncy;":"н","ndash;":"–","ne;":"≠","neArr;":"⇗","nearhk;":"⤤","nearr;":"↗","nearrow;":"↗","nedot;":"≐̸","nequiv;":"≢","nesear;":"⤨","nesim;":"≂̸","nexist;":"∄","nexists;":"∄","nfr;":"𝔫","ngE;":"≧̸","nge;":"≱","ngeq;":"≱","ngeqq;":"≧̸","ngeqslant;":"⩾̸","nges;":"⩾̸","ngsim;":"≵","ngt;":"≯","ngtr;":"≯","nhArr;":"⇎","nharr;":"↮","nhpar;":"⫲","ni;":"∋","nis;":"⋼","nisd;":"⋺","niv;":"∋","njcy;":"њ","nlArr;":"⇍","nlE;":"≦̸","nlarr;":"↚","nldr;":"‥","nle;":"≰","nleftarrow;":"↚","nleftrightarrow;":"↮","nleq;":"≰","nleqq;":"≦̸","nleqslant;":"⩽̸","nles;":"⩽̸","nless;":"≮","nlsim;":"≴","nlt;":"≮","nltri;":"⋪","nltrie;":"⋬","nmid;":"∤","nopf;":"𝕟","not":"¬","not;":"¬","notin;":"∉","notinE;":"⋹̸","notindot;":"⋵̸","notinva;":"∉","notinvb;":"⋷","notinvc;":"⋶","notni;":"∌","notniva;":"∌","notnivb;":"⋾","notnivc;":"⋽","npar;":"∦","nparallel;":"∦","nparsl;":"⫽⃥","npart;":"∂̸","npolint;":"⨔","npr;":"⊀","nprcue;":"⋠","npre;":"⪯̸","nprec;":"⊀","npreceq;":"⪯̸","nrArr;":"⇏","nrarr;":"↛","nrarrc;":"⤳̸","nrarrw;":"↝̸","nrightarrow;":"↛","nrtri;":"⋫","nrtrie;":"⋭","nsc;":"⊁","nsccue;":"⋡","nsce;":"⪰̸","nscr;":"𝓃","nshortmid;":"∤","nshortparallel;":"∦","nsim;":"≁","nsime;":"≄","nsimeq;":"≄","nsmid;":"∤","nspar;":"∦","nsqsube;":"⋢","nsqsupe;":"⋣","nsub;":"⊄","nsubE;":"⫅̸","nsube;":"⊈","nsubset;":"⊂⃒","nsubseteq;":"⊈","nsubseteqq;":"⫅̸","nsucc;":"⊁","nsucceq;":"⪰̸","nsup;":"⊅","nsupE;":"⫆̸","nsupe;":"⊉","nsupset;":"⊃⃒","nsupseteq;":"⊉","nsupseteqq;":"⫆̸","ntgl;":"≹","ntilde":"ñ","ntilde;":"ñ","ntlg;":"≸","ntriangleleft;":"⋪","ntrianglelefteq;":"⋬","ntriangleright;":"⋫","ntrianglerighteq;":"⋭","nu;":"ν","num;":"#","numero;":"№","numsp;":" ","nvDash;":"⊭","nvHarr;":"⤄","nvap;":"≍⃒","nvdash;":"⊬","nvge;":"≥⃒","nvgt;":">⃒","nvinfin;":"⧞","nvlArr;":"⤂","nvle;":"≤⃒","nvlt;":"<⃒","nvltrie;":"⊴⃒","nvrArr;":"⤃","nvrtrie;":"⊵⃒","nvsim;":"∼⃒","nwArr;":"⇖","nwarhk;":"⤣","nwarr;":"↖","nwarrow;":"↖","nwnear;":"⤧","oS;":"Ⓢ","oacute":"ó","oacute;":"ó","oast;":"⊛","ocir;":"⊚","ocirc":"ô","ocirc;":"ô","ocy;":"о","odash;":"⊝","odblac;":"ő","odiv;":"⨸","odot;":"⊙","odsold;":"⦼","oelig;":"œ","ofcir;":"⦿","ofr;":"𝔬","ogon;":"˛","ograve":"ò","ograve;":"ò","ogt;":"⧁","ohbar;":"⦵","ohm;":"Ω","oint;":"∮","olarr;":"↺","olcir;":"⦾","olcross;":"⦻","oline;":"‾","olt;":"⧀","omacr;":"ō","omega;":"ω","omicron;":"ο","omid;":"⦶","ominus;":"⊖","oopf;":"𝕠","opar;":"⦷","operp;":"⦹","oplus;":"⊕","or;":"∨","orarr;":"↻","ord;":"⩝","order;":"ℴ","orderof;":"ℴ","ordf":"ª","ordf;":"ª","ordm":"º","ordm;":"º","origof;":"⊶","oror;":"⩖","orslope;":"⩗","orv;":"⩛","oscr;":"ℴ","oslash":"ø","oslash;":"ø","osol;":"⊘","otilde":"õ","otilde;":"õ","otimes;":"⊗","otimesas;":"⨶","ouml":"ö","ouml;":"ö","ovbar;":"⌽","par;":"∥","para":"¶","para;":"¶","parallel;":"∥","parsim;":"⫳","parsl;":"⫽","part;":"∂","pcy;":"п","percnt;":"%","period;":".","permil;":"‰","perp;":"⊥","pertenk;":"‱","pfr;":"𝔭","phi;":"φ","phiv;":"ϕ","phmmat;":"ℳ","phone;":"☎","pi;":"π","pitchfork;":"⋔","piv;":"ϖ","planck;":"ℏ","planckh;":"ℎ","plankv;":"ℏ","plus;":"+","plusacir;":"⨣","plusb;":"⊞","pluscir;":"⨢","plusdo;":"∔","plusdu;":"⨥","pluse;":"⩲","plusmn":"±","plusmn;":"±","plussim;":"⨦","plustwo;":"⨧","pm;":"±","pointint;":"⨕","popf;":"𝕡","pound":"£","pound;":"£","pr;":"≺","prE;":"⪳","prap;":"⪷","prcue;":"≼","pre;":"⪯","prec;":"≺","precapprox;":"⪷","preccurlyeq;":"≼","preceq;":"⪯","precnapprox;":"⪹","precneqq;":"⪵","precnsim;":"⋨","precsim;":"≾","prime;":"′","primes;":"ℙ","prnE;":"⪵","prnap;":"⪹","prnsim;":"⋨","prod;":"∏","profalar;":"⌮","profline;":"⌒","profsurf;":"⌓","prop;":"∝","propto;":"∝","prsim;":"≾","prurel;":"⊰","pscr;":"𝓅","psi;":"ψ","puncsp;":" ","qfr;":"𝔮","qint;":"⨌","qopf;":"𝕢","qprime;":"⁗","qscr;":"𝓆","quaternions;":"ℍ","quatint;":"⨖","quest;":"?","questeq;":"≟","quot":"\"","quot;":"\"","rAarr;":"⇛","rArr;":"⇒","rAtail;":"⤜","rBarr;":"⤏","rHar;":"⥤","race;":"∽̱","racute;":"ŕ","radic;":"√","raemptyv;":"⦳","rang;":"⟩","rangd;":"⦒","range;":"⦥","rangle;":"⟩","raquo":"»","raquo;":"»","rarr;":"→","rarrap;":"⥵","rarrb;":"⇥","rarrbfs;":"⤠","rarrc;":"⤳","rarrfs;":"⤞","rarrhk;":"↪","rarrlp;":"↬","rarrpl;":"⥅","rarrsim;":"⥴","rarrtl;":"↣","rarrw;":"↝","ratail;":"⤚","ratio;":"∶","rationals;":"ℚ","rbarr;":"⤍","rbbrk;":"❳","rbrace;":"}","rbrack;":"]","rbrke;":"⦌","rbrksld;":"⦎","rbrkslu;":"⦐","rcaron;":"ř","rcedil;":"ŗ","rceil;":"⌉","rcub;":"}","rcy;":"р","rdca;":"⤷","rdldhar;":"⥩","rdquo;":"”","rdquor;":"”","rdsh;":"↳","real;":"ℜ","realine;":"ℛ","realpart;":"ℜ","reals;":"ℝ","rect;":"▭","reg":"®","reg;":"®","rfisht;":"⥽","rfloor;":"⌋","rfr;":"𝔯","rhard;":"⇁","rharu;":"⇀","rharul;":"⥬","rho;":"ρ","rhov;":"ϱ","rightarrow;":"→","rightarrowtail;":"↣","rightharpoondown;":"⇁","rightharpoonup;":"⇀","rightleftarrows;":"⇄","rightleftharpoons;":"⇌","rightrightarrows;":"⇉","rightsquigarrow;":"↝","rightthreetimes;":"⋌","ring;":"˚","risingdotseq;":"≓","rlarr;":"⇄","rlhar;":"⇌","rlm;":"‏","rmoust;":"⎱","rmoustache;":"⎱","rnmid;":"⫮","roang;":"⟭","roarr;":"⇾","robrk;":"⟧","ropar;":"⦆","ropf;":"𝕣","roplus;":"⨮","rotimes;":"⨵","rpar;":")","rpargt;":"⦔","rppolint;":"⨒","rrarr;":"⇉","rsaquo;":"›","rscr;":"𝓇","rsh;":"↱","rsqb;":"]","rsquo;":"’","rsquor;":"’","rthree;":"⋌","rtimes;":"⋊","rtri;":"▹","rtrie;":"⊵","rtrif;":"▸","rtriltri;":"⧎","ruluhar;":"⥨","rx;":"℞","sacute;":"ś","sbquo;":"‚","sc;":"≻","scE;":"⪴","scap;":"⪸","scaron;":"š","sccue;":"≽","sce;":"⪰","scedil;":"ş","scirc;":"ŝ","scnE;":"⪶","scnap;":"⪺","scnsim;":"⋩","scpolint;":"⨓","scsim;":"≿","scy;":"с","sdot;":"⋅","sdotb;":"⊡","sdote;":"⩦","seArr;":"⇘","searhk;":"⤥","searr;":"↘","searrow;":"↘","sect":"§","sect;":"§","semi;":";","seswar;":"⤩","setminus;":"∖","setmn;":"∖","sext;":"✶","sfr;":"𝔰","sfrown;":"⌢","sharp;":"♯","shchcy;":"щ","shcy;":"ш","shortmid;":"∣","shortparallel;":"∥","shy":"­","shy;":"­","sigma;":"σ","sigmaf;":"ς","sigmav;":"ς","sim;":"∼","simdot;":"⩪","sime;":"≃","simeq;":"≃","simg;":"⪞","simgE;":"⪠","siml;":"⪝","simlE;":"⪟","simne;":"≆","simplus;":"⨤","simrarr;":"⥲","slarr;":"←","smallsetminus;":"∖","smashp;":"⨳","smeparsl;":"⧤","smid;":"∣","smile;":"⌣","smt;":"⪪","smte;":"⪬","smtes;":"⪬︀","softcy;":"ь","sol;":"/","solb;":"⧄","solbar;":"⌿","sopf;":"𝕤","spades;":"♠","spadesuit;":"♠","spar;":"∥","sqcap;":"⊓","sqcaps;":"⊓︀","sqcup;":"⊔","sqcups;":"⊔︀","sqsub;":"⊏","sqsube;":"⊑","sqsubset;":"⊏","sqsubseteq;":"⊑","sqsup;":"⊐","sqsupe;":"⊒","sqsupset;":"⊐","sqsupseteq;":"⊒","squ;":"□","square;":"□","squarf;":"▪","squf;":"▪","srarr;":"→","sscr;":"𝓈","ssetmn;":"∖","ssmile;":"⌣","sstarf;":"⋆","star;":"☆","starf;":"★","straightepsilon;":"ϵ","straightphi;":"ϕ","strns;":"¯","sub;":"⊂","subE;":"⫅","subdot;":"⪽","sube;":"⊆","subedot;":"⫃","submult;":"⫁","subnE;":"⫋","subne;":"⊊","subplus;":"⪿","subrarr;":"⥹","subset;":"⊂","subseteq;":"⊆","subseteqq;":"⫅","subsetneq;":"⊊","subsetneqq;":"⫋","subsim;":"⫇","subsub;":"⫕","subsup;":"⫓","succ;":"≻","succapprox;":"⪸","succcurlyeq;":"≽","succeq;":"⪰","succnapprox;":"⪺","succneqq;":"⪶","succnsim;":"⋩","succsim;":"≿","sum;":"∑","sung;":"♪","sup1":"¹","sup1;":"¹","sup2":"²","sup2;":"²","sup3":"³","sup3;":"³","sup;":"⊃","supE;":"⫆","supdot;":"⪾","supdsub;":"⫘","supe;":"⊇","supedot;":"⫄","suphsol;":"⟉","suphsub;":"⫗","suplarr;":"⥻","supmult;":"⫂","supnE;":"⫌","supne;":"⊋","supplus;":"⫀","supset;":"⊃","supseteq;":"⊇","supseteqq;":"⫆","supsetneq;":"⊋","supsetneqq;":"⫌","supsim;":"⫈","supsub;":"⫔","supsup;":"⫖","swArr;":"⇙","swarhk;":"⤦","swarr;":"↙","swarrow;":"↙","swnwar;":"⤪","szlig":"ß","szlig;":"ß","target;":"⌖","tau;":"τ","tbrk;":"⎴","tcaron;":"ť","tcedil;":"ţ","tcy;":"т","tdot;":"⃛","telrec;":"⌕","tfr;":"𝔱","there4;":"∴","therefore;":"∴","theta;":"θ","thetasym;":"ϑ","thetav;":"ϑ","thickapprox;":"≈","thicksim;":"∼","thinsp;":" ","thkap;":"≈","thksim;":"∼","thorn":"þ","thorn;":"þ","tilde;":"˜","times":"×","times;":"×","timesb;":"⊠","timesbar;":"⨱","timesd;":"⨰","tint;":"∭","toea;":"⤨","top;":"⊤","topbot;":"⌶","topcir;":"⫱","topf;":"𝕥","topfork;":"⫚","tosa;":"⤩","tprime;":"‴","trade;":"™","triangle;":"▵","triangledown;":"▿","triangleleft;":"◃","trianglelefteq;":"⊴","triangleq;":"≜","triangleright;":"▹","trianglerighteq;":"⊵","tridot;":"◬","trie;":"≜","triminus;":"⨺","triplus;":"⨹","trisb;":"⧍","tritime;":"⨻","trpezium;":"⏢","tscr;":"𝓉","tscy;":"ц","tshcy;":"ћ","tstrok;":"ŧ","twixt;":"≬","twoheadleftarrow;":"↞","twoheadrightarrow;":"↠","uArr;":"⇑","uHar;":"⥣","uacute":"ú","uacute;":"ú","uarr;":"↑","ubrcy;":"ў","ubreve;":"ŭ","ucirc":"û","ucirc;":"û","ucy;":"у","udarr;":"⇅","udblac;":"ű","udhar;":"⥮","ufisht;":"⥾","ufr;":"𝔲","ugrave":"ù","ugrave;":"ù","uharl;":"↿","uharr;":"↾","uhblk;":"▀","ulcorn;":"⌜","ulcorner;":"⌜","ulcrop;":"⌏","ultri;":"◸","umacr;":"ū","uml":"¨","uml;":"¨","uogon;":"ų","uopf;":"𝕦","uparrow;":"↑","updownarrow;":"↕","upharpoonleft;":"↿","upharpoonright;":"↾","uplus;":"⊎","upsi;":"υ","upsih;":"ϒ","upsilon;":"υ","upuparrows;":"⇈","urcorn;":"⌝","urcorner;":"⌝","urcrop;":"⌎","uring;":"ů","urtri;":"◹","uscr;":"𝓊","utdot;":"⋰","utilde;":"ũ","utri;":"▵","utrif;":"▴","uuarr;":"⇈","uuml":"ü","uuml;":"ü","uwangle;":"⦧","vArr;":"⇕","vBar;":"⫨","vBarv;":"⫩","vDash;":"⊨","vangrt;":"⦜","varepsilon;":"ϵ","varkappa;":"ϰ","varnothing;":"∅","varphi;":"ϕ","varpi;":"ϖ","varpropto;":"∝","varr;":"↕","varrho;":"ϱ","varsigma;":"ς","varsubsetneq;":"⊊︀","varsubsetneqq;":"⫋︀","varsupsetneq;":"⊋︀","varsupsetneqq;":"⫌︀","vartheta;":"ϑ","vartriangleleft;":"⊲","vartriangleright;":"⊳","vcy;":"в","vdash;":"⊢","vee;":"∨","veebar;":"⊻","veeeq;":"≚","vellip;":"⋮","verbar;":"|","vert;":"|","vfr;":"𝔳","vltri;":"⊲","vnsub;":"⊂⃒","vnsup;":"⊃⃒","vopf;":"𝕧","vprop;":"∝","vrtri;":"⊳","vscr;":"𝓋","vsubnE;":"⫋︀","vsubne;":"⊊︀","vsupnE;":"⫌︀","vsupne;":"⊋︀","vzigzag;":"⦚","wcirc;":"ŵ","wedbar;":"⩟","wedge;":"∧","wedgeq;":"≙","weierp;":"℘","wfr;":"𝔴","wopf;":"𝕨","wp;":"℘","wr;":"≀","wreath;":"≀","wscr;":"𝓌","xcap;":"⋂","xcirc;":"◯","xcup;":"⋃","xdtri;":"▽","xfr;":"𝔵","xhArr;":"⟺","xharr;":"⟷","xi;":"ξ","xlArr;":"⟸","xlarr;":"⟵","xmap;":"⟼","xnis;":"⋻","xodot;":"⨀","xopf;":"𝕩","xoplus;":"⨁","xotime;":"⨂","xrArr;":"⟹","xrarr;":"⟶","xscr;":"𝓍","xsqcup;":"⨆","xuplus;":"⨄","xutri;":"△","xvee;":"⋁","xwedge;":"⋀","yacute":"ý","yacute;":"ý","yacy;":"я","ycirc;":"ŷ","ycy;":"ы","yen":"¥","yen;":"¥","yfr;":"𝔶","yicy;":"ї","yopf;":"𝕪","yscr;":"𝓎","yucy;":"ю","yuml":"ÿ","yuml;":"ÿ","zacute;":"ź","zcaron;":"ž","zcy;":"з","zdot;":"ż","zeetrf;":"ℨ","zeta;":"ζ","zfr;":"𝔷","zhcy;":"ж","zigrarr;":"⇝","zopf;":"𝕫","zscr;":"𝓏","zwj;":"‍","zwnj;":"‌"})));
26
+ // #endregion
27
+
28
+ const STATE_DATA = 0;
29
+ const STATE_TAG_OPEN = 1;
30
+ const STATE_END_TAG_OPEN = 2;
31
+ const STATE_TAG_NAME = 3;
32
+ const STATE_BEFORE_ATTRIBUTE_NAME = 4;
33
+ const STATE_ATTRIBUTE_NAME = 5;
34
+ const STATE_AFTER_ATTRIBUTE_NAME = 6;
35
+ const STATE_BEFORE_ATTRIBUTE_VALUE = 7;
36
+ const STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
37
+ const STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
38
+ const STATE_ATTRIBUTE_VALUE_UNQUOTED = 10;
39
+ const STATE_AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
40
+ const STATE_SELF_CLOSING_START_TAG = 12;
41
+
42
+ const STATE_MARKUP_DECLARATION_OPEN = 13;
43
+ const STATE_COMMENT_START = 14;
44
+ const STATE_COMMENT_START_DASH = 15;
45
+ const STATE_COMMENT = 16;
46
+ const STATE_COMMENT_END_DASH = 17;
47
+ const STATE_COMMENT_END = 18;
48
+ const STATE_COMMENT_END_BANG = 19;
49
+ const STATE_BOGUS_COMMENT = 20;
50
+
51
+ const STATE_COMMENT_LESS_THAN_SIGN = 21;
52
+ const STATE_COMMENT_LESS_THAN_SIGN_BANG = 22;
53
+ const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH = 23;
54
+ const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 24;
55
+
56
+ const STATE_DOCTYPE = 25;
57
+ const STATE_BEFORE_DOCTYPE_NAME = 26;
58
+ const STATE_DOCTYPE_NAME = 27;
59
+ const STATE_AFTER_DOCTYPE_NAME = 28;
60
+ const STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD = 29;
61
+ const STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 30;
62
+ const STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 31;
63
+ const STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 32;
64
+ const STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 33;
65
+ const STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 34;
66
+ const STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD = 35;
67
+ const STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 36;
68
+ const STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 37;
69
+ const STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 38;
70
+ const STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 39;
71
+ const STATE_BOGUS_DOCTYPE = 40;
72
+
73
+ const STATE_CDATA_SECTION = 41;
74
+ const STATE_CDATA_SECTION_BRACKET = 42;
75
+ const STATE_CDATA_SECTION_END = 43;
76
+
77
+ const STATE_RCDATA = 44;
78
+ const STATE_RCDATA_LESS_THAN_SIGN = 45;
79
+ const STATE_RCDATA_END_TAG_OPEN = 46;
80
+ const STATE_RCDATA_END_TAG_NAME = 47;
81
+
82
+ const STATE_RAWTEXT = 48;
83
+ const STATE_RAWTEXT_LESS_THAN_SIGN = 49;
84
+ const STATE_RAWTEXT_END_TAG_OPEN = 50;
85
+ const STATE_RAWTEXT_END_TAG_NAME = 51;
86
+
87
+ const STATE_SCRIPT_DATA = 52;
88
+ const STATE_SCRIPT_DATA_LESS_THAN_SIGN = 53;
89
+ const STATE_SCRIPT_DATA_END_TAG_OPEN = 54;
90
+ const STATE_SCRIPT_DATA_END_TAG_NAME = 55;
91
+ const STATE_SCRIPT_DATA_ESCAPE_START = 56;
92
+ const STATE_SCRIPT_DATA_ESCAPE_START_DASH = 57;
93
+ const STATE_SCRIPT_DATA_ESCAPED = 58;
94
+ const STATE_SCRIPT_DATA_ESCAPED_DASH = 59;
95
+ const STATE_SCRIPT_DATA_ESCAPED_DASH_DASH = 60;
96
+ const STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 61;
97
+ const STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 62;
98
+ const STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME = 63;
99
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START = 64;
100
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED = 65;
101
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 66;
102
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 67;
103
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 68;
104
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END = 69;
105
+
106
+ const STATE_PLAINTEXT = 70;
107
+
108
+ // https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
109
+ const STATE_CHARACTER_REFERENCE = 71;
110
+ // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
111
+ const STATE_NAMED_CHARACTER_REFERENCE = 72;
112
+ // https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
113
+ const STATE_AMBIGUOUS_AMPERSAND = 73;
114
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
115
+ const STATE_NUMERIC_CHARACTER_REFERENCE = 74;
116
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
117
+ const STATE_HEXADECIMAL_CHARACTER_REFERENCE_START = 75;
118
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
119
+ const STATE_DECIMAL_CHARACTER_REFERENCE_START = 76;
120
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
121
+ const STATE_HEXADECIMAL_CHARACTER_REFERENCE = 77;
122
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
123
+ const STATE_DECIMAL_CHARACTER_REFERENCE = 78;
124
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
125
+ const STATE_NUMERIC_CHARACTER_REFERENCE_END = 79;
126
+
127
+ const CC_TAB = 0x09;
128
+ const CC_LF = 0x0a;
129
+ const CC_FF = 0x0c;
130
+ const CC_SPACE = 0x20;
131
+ const CC_EXCLAMATION_MARK = 0x21;
132
+ const CC_QUOTATION_MARK = 0x22;
133
+ const CC_NUMBER_SIGN = 0x23;
134
+ const CC_AMPERSAND = 0x26;
135
+ const CC_APOSTROPHE = 0x27;
136
+ const CC_HYPHEN_MINUS = 0x2d;
137
+ const CC_SOLIDUS = 0x2f;
138
+ const CC_SEMICOLON = 0x3b;
139
+ const CC_LESS_THAN = 0x3c;
140
+ const CC_EQUALS = 0x3d;
141
+ const CC_GREATER_THAN = 0x3e;
142
+ const CC_QUESTION_MARK = 0x3f;
143
+ const CC_LEFT_SQUARE_BRACKET = 0x5b;
144
+ const CC_RIGHT_SQUARE_BRACKET = 0x5d;
145
+
146
+ const QUOTE_DOUBLE = 1;
147
+ const QUOTE_SINGLE = 2;
148
+ const QUOTE_NONE = 0;
149
+
150
+ // Longest WHATWG named entity name *including* the trailing `;` is 32 chars
151
+ // (`CounterClockwiseContourIntegral;`); without the trailing `;` it's 31.
152
+ // Used to cap both the tokenizer's named-character-reference run length and
153
+ // the decoder's longest-prefix backtrack so pathological inputs (e.g. `&`
154
+ // followed by thousands of alphanumerics) stay linear-time.
155
+ const MAX_ENTITY_NAME_LEN = 32;
156
+
157
+ /**
158
+ * @param {number} cc character code
159
+ * @returns {boolean} is ascii alpha
160
+ */
161
+ const isAsciiAlpha = (cc) =>
162
+ (cc >= 0x41 && cc <= 0x5a) || (cc >= 0x61 && cc <= 0x7a);
163
+
164
+ /**
165
+ * @param {number} cc character code
166
+ * @returns {boolean} is ascii alphanumeric
167
+ */
168
+ const isAsciiAlphanumeric = (cc) =>
169
+ isAsciiAlpha(cc) || (cc >= 0x30 && cc <= 0x39);
170
+
171
+ /**
172
+ * @param {number} cc character code
173
+ * @returns {boolean} is ascii digit
174
+ */
175
+ const isAsciiDigit = (cc) => cc >= 0x30 && cc <= 0x39;
176
+
177
+ /**
178
+ * @param {number} cc character code
179
+ * @returns {boolean} is ascii hex digit
180
+ */
181
+ const isAsciiHexDigit = (cc) =>
182
+ (cc >= 0x30 && cc <= 0x39) ||
183
+ (cc >= 0x41 && cc <= 0x46) ||
184
+ (cc >= 0x61 && cc <= 0x66);
185
+
186
+ /**
187
+ * @param {number} cc character code
188
+ * @returns {boolean} is space
189
+ */
190
+ const isSpace = (cc) =>
191
+ cc === CC_TAB || cc === CC_LF || cc === CC_FF || cc === CC_SPACE;
192
+
193
+ /**
194
+ * Severity of a tokenizer-detected parse error. `"warning"` is recoverable
195
+ * (the tokenizer continued and the emitted token is still well-formed, e.g.
196
+ * missing-attribute-value); `"error"` means the emitted token's offset
197
+ * range is incomplete or does not match what the spec would produce, e.g.
198
+ * eof-in-tag.
199
+ *
200
+ * Token offsets are JS string indices (UTF-16 code-unit offsets into
201
+ * `input`), not byte offsets — relevant for inputs containing non-BMP
202
+ * code points where one code point spans two indices.
203
+ * @typedef {"warning" | "error"} ParseErrorSeverity
204
+ */
205
+
206
+ /**
207
+ * @typedef {object} HtmlTokenCallbacks
208
+ * @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number, selfClosing: boolean) => number=} openTag
209
+ * @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number) => number=} closeTag
210
+ * @property {(input: string, start: number, end: number) => number=} text
211
+ * @property {(input: string, nameStart: number, nameEnd: number, valueStart: number, valueEnd: number, quoteType: number) => number=} attribute
212
+ * @property {(input: string, start: number, end: number) => number=} comment
213
+ * @property {(input: string, start: number, end: number) => number=} doctype
214
+ * @property {(input: string, code: string, start: number, end: number, severity: ParseErrorSeverity) => void=} parseError
215
+ */
216
+
217
+ /**
218
+ * @param {string} input input string
219
+ * @param {number} pos current position
220
+ * @param {HtmlTokenCallbacks} callbacks callbacks
221
+ * @returns {number} final position
222
+ */
223
+ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
224
+ const len = input.length;
225
+ let state = STATE_DATA;
226
+ let returnState = STATE_DATA;
227
+
228
+ let textStart = pos;
229
+ let tagStart = pos;
230
+ let tagNameStart = -1;
231
+ let tagNameEnd = -1;
232
+ let attrNameStart = -1;
233
+ let attrNameEnd = -1;
234
+ let attrValueStart = -1;
235
+ let attrQuoteType = QUOTE_NONE;
236
+ let commentStart = pos;
237
+ let lastOpenTagName = "";
238
+ // Counter used by SCRIPT_DATA_DOUBLE_ESCAPE_{START,END} to detect whether
239
+ // the ASCII-alpha run after `<` / `</` spells exactly `"script"`. Values
240
+ // 0..6 = number of chars matched so far; 7 = no longer matches (sentinel).
241
+ // Avoids growing a buffer for pathological inputs with long alpha runs.
242
+ let scriptMatch = 0;
243
+ let namedEntityConsumed = 0;
244
+ // Tracks whether the current tag has parsed any attributes — used to
245
+ // fire the `end-tag-with-attributes` parse error when an end tag emits.
246
+ let tagHasAttributes = false;
247
+
248
+ /**
249
+ * Reports a tokenizer parse error to the consumer. The offset range and
250
+ * severity follow the WHATWG spec naming. Severity is `"error"` for
251
+ * cases where the emitted token is incomplete (EOF inside a tag or
252
+ * comment); everything else is a `"warning"`. Offsets are JS string
253
+ * indices (UTF-16 code-unit offsets into `input`).
254
+ * @param {string} code WHATWG parse-error code (kebab-case)
255
+ * @param {number} start string offset where the error starts
256
+ * @param {number} end string offset where the error ends
257
+ * @param {ParseErrorSeverity} severity error severity
258
+ */
259
+ const reportError = (code, start, end, severity) => {
260
+ if (callbacks.parseError !== undefined) {
261
+ callbacks.parseError(input, code, start, end, severity);
262
+ }
263
+ };
264
+
265
+ /**
266
+ * @param {number} cc character code
267
+ * @returns {boolean} is ascii lower alpha
268
+ */
269
+ const isAsciiLowerAlpha = (cc) => cc >= 0x61 && cc <= 0x7a;
270
+
271
+ /**
272
+ * @param {number} cc character code
273
+ * @returns {boolean} is ascii upper alpha
274
+ */
275
+ const isAsciiUpperAlpha = (cc) => cc >= 0x41 && cc <= 0x5a;
276
+
277
+ /**
278
+ * @param {string} name tag name (lowercase)
279
+ * @returns {number} content mode state for this tag, or STATE_DATA
280
+ */
281
+ const getContentModeForTag = (name) => {
282
+ switch (name) {
283
+ case "textarea":
284
+ case "title":
285
+ return STATE_RCDATA;
286
+ case "style":
287
+ case "xmp":
288
+ case "iframe":
289
+ case "noembed":
290
+ case "noframes":
291
+ return STATE_RAWTEXT;
292
+ case "script":
293
+ return STATE_SCRIPT_DATA;
294
+ case "plaintext":
295
+ return STATE_PLAINTEXT;
296
+ default:
297
+ return STATE_DATA;
298
+ }
299
+ };
300
+
301
+ /**
302
+ * @param {number} endPos end position
303
+ */
304
+ const flushText = (endPos) => {
305
+ if (textStart < endPos) {
306
+ if (callbacks.text !== undefined) {
307
+ callbacks.text(input, textStart, endPos);
308
+ }
309
+ // Advance `textStart` so a second `flushText` for the same span
310
+ // (e.g. from the EOF handler after a tag-open transition already
311
+ // flushed the pending text) is a no-op rather than a duplicate
312
+ // emit. emitOpenTag / emitCloseTag overwrite `textStart` with
313
+ // their own `nextPos` anyway, so this doesn't shift their start.
314
+ textStart = endPos;
315
+ }
316
+ };
317
+
318
+ /**
319
+ * @param {number} endPos end position
320
+ * @returns {number} next position
321
+ */
322
+ const emitAttribute = (endPos) => {
323
+ // Default `nextPos` advances past the closing quote (if any) so the
324
+ // state machine can continue when no `attribute` callback is provided.
325
+ // When a callback IS provided, its return value overrides the default —
326
+ // the callback is expected to do the same advance based on the
327
+ // reported `quoteType`.
328
+ let nextPos = attrQuoteType === QUOTE_NONE ? endPos : endPos + 1;
329
+ if (callbacks.attribute !== undefined && attrNameStart !== -1) {
330
+ nextPos = callbacks.attribute(
331
+ input,
332
+ attrNameStart,
333
+ attrNameEnd,
334
+ attrValueStart,
335
+ attrValueStart === -1 ? -1 : endPos,
336
+ attrQuoteType
337
+ );
338
+ }
339
+ if (attrNameStart !== -1) tagHasAttributes = true;
340
+ attrNameStart = -1;
341
+ attrValueStart = -1;
342
+ attrQuoteType = QUOTE_NONE;
343
+ return nextPos;
344
+ };
345
+
346
+ /**
347
+ * @param {number} endPos end position
348
+ * @param {boolean} selfClosing is self closing
349
+ * @returns {number} next position
350
+ */
351
+ const emitOpenTag = (endPos, selfClosing) => {
352
+ let nextPos = endPos;
353
+ if (callbacks.openTag !== undefined) {
354
+ nextPos = callbacks.openTag(
355
+ input,
356
+ tagStart,
357
+ endPos,
358
+ tagNameStart,
359
+ tagNameEnd,
360
+ selfClosing
361
+ );
362
+ }
363
+ if (!selfClosing) {
364
+ lastOpenTagName = input.slice(tagNameStart, tagNameEnd).toLowerCase();
365
+ }
366
+ tagHasAttributes = false;
367
+ textStart = nextPos;
368
+ return nextPos;
369
+ };
370
+
371
+ /**
372
+ * @param {number} endPos end position
373
+ * @returns {number} next position
374
+ */
375
+ const emitCloseTag = (endPos) => {
376
+ // Per WHATWG: an end tag emitted with attributes is a parse error.
377
+ if (tagHasAttributes) {
378
+ reportError("end-tag-with-attributes", tagStart, endPos, "warning");
379
+ }
380
+ let nextPos = endPos;
381
+ if (callbacks.closeTag !== undefined) {
382
+ nextPos = callbacks.closeTag(
383
+ input,
384
+ tagStart,
385
+ endPos,
386
+ tagNameStart,
387
+ tagNameEnd
388
+ );
389
+ }
390
+ tagHasAttributes = false;
391
+ textStart = nextPos;
392
+ return nextPos;
393
+ };
394
+
395
+ while (pos < len) {
396
+ const cc = input.charCodeAt(pos);
397
+
398
+ // TODO: We don't handle all states here yet. In the future we will need to handle
399
+ // all of them, and when we move all the tokenizer we will remove it.
400
+ switch (state) {
401
+ // https://html.spec.whatwg.org/multipage/parsing.html#data-state
402
+ case STATE_DATA:
403
+ // Consume the next input character:
404
+ // U+003C LESS-THAN SIGN (<)
405
+ // Set the return state to the data state. Switch to the tag open state.
406
+ if (cc === CC_LESS_THAN) {
407
+ tagStart = pos;
408
+ state = STATE_TAG_OPEN;
409
+ pos++;
410
+ } else if (cc === CC_AMPERSAND) {
411
+ // U+0026 AMPERSAND (&)
412
+ // Set the return state to the data state. Switch to the
413
+ // character reference state.
414
+ returnState = STATE_DATA;
415
+ state = STATE_CHARACTER_REFERENCE;
416
+ pos++;
417
+ } else {
418
+ pos++;
419
+ }
420
+ break;
421
+
422
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
423
+ case STATE_TAG_OPEN:
424
+ // Consume the next input character:
425
+ // U+002F SOLIDUS (/)
426
+ // Switch to the end tag open state.
427
+ if (cc === CC_SOLIDUS) {
428
+ state = STATE_END_TAG_OPEN;
429
+ pos++;
430
+ } else if (cc === CC_EXCLAMATION_MARK) {
431
+ // U+0021 EXCLAMATION MARK (!)
432
+ // Switch to the markup declaration open state.
433
+ flushText(tagStart);
434
+ commentStart = tagStart;
435
+ state = STATE_MARKUP_DECLARATION_OPEN;
436
+ pos++;
437
+ } else if (isAsciiAlpha(cc)) {
438
+ // ASCII alpha
439
+ // Create a new start tag token, set its tag name to the empty string.
440
+ // Reconsume in the tag name state.
441
+ flushText(tagStart);
442
+ tagNameStart = pos;
443
+ state = STATE_TAG_NAME;
444
+ // Reconsume
445
+ } else if (cc === CC_QUESTION_MARK) {
446
+ // U+003F QUESTION MARK (?)
447
+ // This is an unexpected-question-mark-instead-of-tag-name parse error.
448
+ // Create a comment token whose data is the empty string. Reconsume in the
449
+ // bogus comment state.
450
+ reportError(
451
+ "unexpected-question-mark-instead-of-tag-name",
452
+ pos,
453
+ pos + 1,
454
+ "warning"
455
+ );
456
+ flushText(tagStart);
457
+ commentStart = tagStart;
458
+ state = STATE_BOGUS_COMMENT;
459
+ // Reconsume — let the bogus-comment state consume the `?`
460
+ // itself, matching the spec.
461
+ } else {
462
+ // Anything else
463
+ // This is an invalid-first-character-of-tag-name parse error. Emit a U+003C
464
+ // LESS-THAN SIGN character token. Reconsume in the data state.
465
+ reportError(
466
+ "invalid-first-character-of-tag-name",
467
+ pos,
468
+ pos + 1,
469
+ "warning"
470
+ );
471
+ state = STATE_DATA;
472
+ // Reconsume
473
+ }
474
+ break;
475
+
476
+ // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
477
+ case STATE_END_TAG_OPEN:
478
+ // Consume the next input character:
479
+ // ASCII alpha
480
+ // Create a new end tag token, set its tag name to the empty string.
481
+ // Reconsume in the tag name state.
482
+ if (isAsciiAlpha(cc)) {
483
+ flushText(tagStart);
484
+ tagNameStart = pos;
485
+ state = STATE_TAG_NAME;
486
+ // Reconsume
487
+ } else if (cc === CC_GREATER_THAN) {
488
+ // U+003E GREATER-THAN SIGN (>)
489
+ // This is a missing-end-tag-name parse error. Switch to the data state.
490
+ reportError("missing-end-tag-name", pos, pos + 1, "warning");
491
+ state = STATE_DATA;
492
+ pos++;
493
+ } else {
494
+ // Anything else
495
+ // This is an invalid-first-character-of-tag-name parse error. Create a
496
+ // comment token whose data is the empty string. Reconsume in the bogus
497
+ // comment state.
498
+ reportError(
499
+ "invalid-first-character-of-tag-name",
500
+ pos,
501
+ pos + 1,
502
+ "warning"
503
+ );
504
+ flushText(tagStart);
505
+ commentStart = tagStart;
506
+ state = STATE_BOGUS_COMMENT;
507
+ // Reconsume — let bogus-comment consume this char itself.
508
+ }
509
+ break;
510
+
511
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
512
+ case STATE_TAG_NAME:
513
+ // Consume the next input character:
514
+ // U+0009 CHARACTER TABULATION (tab)
515
+ // U+000A LINE FEED (LF)
516
+ // U+000C FORM FEED (FF)
517
+ // U+0020 SPACE
518
+ // Switch to the before attribute name state.
519
+ if (isSpace(cc)) {
520
+ tagNameEnd = pos;
521
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
522
+ pos++;
523
+ } else if (cc === CC_SOLIDUS) {
524
+ // U+002F SOLIDUS (/)
525
+ // Switch to the self-closing start tag state.
526
+ tagNameEnd = pos;
527
+ state = STATE_SELF_CLOSING_START_TAG;
528
+ pos++;
529
+ } else if (cc === CC_GREATER_THAN) {
530
+ // U+003E GREATER-THAN SIGN (>)
531
+ // Switch to the data state. Emit the current tag token.
532
+ tagNameEnd = pos;
533
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
534
+ state = STATE_DATA;
535
+ pos = emitCloseTag(pos + 1);
536
+ } else {
537
+ const nextPos = emitOpenTag(pos + 1, false);
538
+ state =
539
+ nextPos > pos + 1
540
+ ? STATE_DATA
541
+ : getContentModeForTag(lastOpenTagName);
542
+ pos = nextPos;
543
+ }
544
+ } else {
545
+ pos++;
546
+ }
547
+ break;
548
+
549
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
550
+ case STATE_BEFORE_ATTRIBUTE_NAME:
551
+ // Consume the next input character:
552
+ // U+0009 CHARACTER TABULATION (tab)
553
+ // U+000A LINE FEED (LF)
554
+ // U+000C FORM FEED (FF)
555
+ // U+0020 SPACE
556
+ // Ignore the character.
557
+ // Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
558
+ if (isSpace(cc)) {
559
+ pos++;
560
+ } else if (cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
561
+ // U+002F SOLIDUS (/)
562
+ // U+003E GREATER-THAN SIGN (>)
563
+ // EOF
564
+ // Reconsume in the after attribute name state.
565
+ state = STATE_AFTER_ATTRIBUTE_NAME;
566
+ // Reconsume
567
+ } else if (cc === CC_EQUALS) {
568
+ // U+003D EQUALS SIGN (=)
569
+ // This is an unexpected-equals-sign-before-attribute-name parse
570
+ // error. Start a new attribute. Switch to the attribute name state.
571
+ reportError(
572
+ "unexpected-equals-sign-before-attribute-name",
573
+ pos,
574
+ pos + 1,
575
+ "warning"
576
+ );
577
+ attrNameStart = pos;
578
+ state = STATE_ATTRIBUTE_NAME;
579
+ pos++;
580
+ } else {
581
+ // Anything else
582
+ // Start a new attribute in the current tag token. Set that attribute name
583
+ // and value to the empty string. Reconsume in the attribute name state.
584
+ attrNameStart = pos;
585
+ state = STATE_ATTRIBUTE_NAME;
586
+ // Reconsume
587
+ }
588
+ break;
589
+
590
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
591
+ case STATE_ATTRIBUTE_NAME:
592
+ // Consume the next input character:
593
+ // U+0009 CHARACTER TABULATION (tab)
594
+ // U+000A LINE FEED (LF)
595
+ // U+000C FORM FEED (FF)
596
+ // U+0020 SPACE
597
+ // U+002F SOLIDUS (/)
598
+ // U+003E GREATER-THAN SIGN (>)
599
+ // EOF
600
+ // Reconsume in the after attribute name state.
601
+ if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
602
+ attrNameEnd = pos;
603
+ state = STATE_AFTER_ATTRIBUTE_NAME;
604
+ // Reconsume
605
+ } else if (cc === CC_EQUALS) {
606
+ attrNameEnd = pos;
607
+ state = STATE_BEFORE_ATTRIBUTE_VALUE;
608
+ pos++;
609
+ } else {
610
+ pos++;
611
+ }
612
+ break;
613
+
614
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
615
+ case STATE_AFTER_ATTRIBUTE_NAME:
616
+ // Consume the next input character:
617
+ // U+0009 CHARACTER TABULATION (tab)
618
+ // U+000A LINE FEED (LF)
619
+ // U+000C FORM FEED (FF)
620
+ // U+0020 SPACE
621
+ // Ignore the character.
622
+ if (isSpace(cc)) {
623
+ pos++;
624
+ } else if (cc === CC_SOLIDUS) {
625
+ // U+002F SOLIDUS (/)
626
+ // Switch to the self-closing start tag state.
627
+ emitAttribute(pos);
628
+ state = STATE_SELF_CLOSING_START_TAG;
629
+ pos++;
630
+ } else if (cc === CC_EQUALS) {
631
+ // U+003D EQUALS SIGN (=)
632
+ // Switch to the before attribute value state.
633
+ state = STATE_BEFORE_ATTRIBUTE_VALUE;
634
+ pos++;
635
+ } else if (cc === CC_GREATER_THAN) {
636
+ // U+003E GREATER-THAN SIGN (>)
637
+ // Switch to the data state. Emit the current tag token.
638
+ emitAttribute(pos);
639
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
640
+ state = STATE_DATA;
641
+ pos = emitCloseTag(pos + 1);
642
+ } else {
643
+ const nextPos = emitOpenTag(pos + 1, false);
644
+ state =
645
+ nextPos > pos + 1
646
+ ? STATE_DATA
647
+ : getContentModeForTag(lastOpenTagName);
648
+ pos = nextPos;
649
+ }
650
+ } else {
651
+ // Anything else
652
+ // Start a new attribute in the current tag token.
653
+ emitAttribute(pos);
654
+ attrNameStart = pos;
655
+ state = STATE_ATTRIBUTE_NAME;
656
+ // Reconsume
657
+ }
658
+ break;
659
+
660
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
661
+ case STATE_BEFORE_ATTRIBUTE_VALUE:
662
+ // Consume the next input character:
663
+ // U+0009 CHARACTER TABULATION (tab)
664
+ // U+000A LINE FEED (LF)
665
+ // U+000C FORM FEED (FF)
666
+ // U+0020 SPACE
667
+ // Ignore the character.
668
+ if (isSpace(cc)) {
669
+ pos++;
670
+ } else if (cc === CC_QUOTATION_MARK) {
671
+ // U+0022 QUOTATION MARK (")
672
+ // Switch to the attribute value (double-quoted) state.
673
+ attrValueStart = pos + 1;
674
+ attrQuoteType = QUOTE_DOUBLE;
675
+ state = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
676
+ pos++;
677
+ } else if (cc === CC_APOSTROPHE) {
678
+ // U+0027 APOSTROPHE (')
679
+ // Switch to the attribute value (single-quoted) state.
680
+ attrValueStart = pos + 1;
681
+ attrQuoteType = QUOTE_SINGLE;
682
+ state = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
683
+ pos++;
684
+ } else if (cc === CC_GREATER_THAN) {
685
+ // U+003E GREATER-THAN SIGN (>)
686
+ // This is a missing-attribute-value parse error. Switch to the data
687
+ // state. Emit the current tag token. The attribute is reported with
688
+ // an empty value range pointing at the `>` so the open-tag offset range
689
+ // still includes the `>`.
690
+ reportError("missing-attribute-value", pos, pos + 1, "warning");
691
+ attrValueStart = pos;
692
+ attrQuoteType = QUOTE_NONE;
693
+ pos = emitAttribute(pos);
694
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
695
+ state = STATE_DATA;
696
+ pos = emitCloseTag(pos + 1);
697
+ } else {
698
+ const nextPos = emitOpenTag(pos + 1, false);
699
+ state =
700
+ nextPos > pos + 1
701
+ ? STATE_DATA
702
+ : getContentModeForTag(lastOpenTagName);
703
+ pos = nextPos;
704
+ }
705
+ } else {
706
+ // Anything else
707
+ // Reconsume in the attribute value (unquoted) state.
708
+ attrValueStart = pos;
709
+ attrQuoteType = QUOTE_NONE;
710
+ state = STATE_ATTRIBUTE_VALUE_UNQUOTED;
711
+ // Reconsume
712
+ }
713
+ break;
714
+
715
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
716
+ case STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED:
717
+ // Consume the next input character:
718
+ // U+0022 QUOTATION MARK (")
719
+ // Switch to the after attribute value (quoted) state.
720
+ if (cc === CC_QUOTATION_MARK) {
721
+ pos = emitAttribute(pos);
722
+ state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
723
+ } else if (cc === CC_AMPERSAND) {
724
+ // U+0026 AMPERSAND (&)
725
+ // Set the return state to the attribute value (double-quoted)
726
+ // state. Switch to the character reference state.
727
+ returnState = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
728
+ state = STATE_CHARACTER_REFERENCE;
729
+ pos++;
730
+ } else {
731
+ pos++;
732
+ }
733
+ break;
734
+
735
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
736
+ case STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED:
737
+ // Consume the next input character:
738
+ // U+0027 APOSTROPHE (')
739
+ // Switch to the after attribute value (quoted) state.
740
+ if (cc === CC_APOSTROPHE) {
741
+ pos = emitAttribute(pos);
742
+ state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
743
+ } else if (cc === CC_AMPERSAND) {
744
+ // U+0026 AMPERSAND (&)
745
+ // Set the return state to the attribute value (single-quoted)
746
+ // state. Switch to the character reference state.
747
+ returnState = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
748
+ state = STATE_CHARACTER_REFERENCE;
749
+ pos++;
750
+ } else {
751
+ pos++;
752
+ }
753
+ break;
754
+
755
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
756
+ case STATE_ATTRIBUTE_VALUE_UNQUOTED:
757
+ if (isSpace(cc)) {
758
+ pos = emitAttribute(pos);
759
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
760
+ // Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
761
+ } else if (cc === CC_GREATER_THAN) {
762
+ // U+003E GREATER-THAN SIGN (>)
763
+ // This is a missing-attribute-value parse error. Switch to the data state.
764
+ // Emit the current tag token.
765
+ pos = emitAttribute(pos);
766
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
767
+ state = STATE_DATA;
768
+ pos = emitCloseTag(pos + 1);
769
+ } else {
770
+ const nextPos = emitOpenTag(pos + 1, false);
771
+ state =
772
+ nextPos > pos + 1
773
+ ? STATE_DATA
774
+ : getContentModeForTag(lastOpenTagName);
775
+ pos = nextPos;
776
+ }
777
+ } else if (cc === CC_AMPERSAND) {
778
+ // U+0026 AMPERSAND (&)
779
+ // Set the return state to the attribute value (unquoted)
780
+ // state. Switch to the character reference state.
781
+ returnState = STATE_ATTRIBUTE_VALUE_UNQUOTED;
782
+ state = STATE_CHARACTER_REFERENCE;
783
+ pos++;
784
+ } else {
785
+ pos++;
786
+ }
787
+ break;
788
+
789
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
790
+ case STATE_AFTER_ATTRIBUTE_VALUE_QUOTED:
791
+ // Consume the next input character:
792
+ // U+0009 CHARACTER TABULATION (tab)
793
+ // U+000A LINE FEED (LF)
794
+ // U+000C FORM FEED (FF)
795
+ // U+0020 SPACE
796
+ // Switch to the before attribute name state.
797
+ if (isSpace(cc)) {
798
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
799
+ pos++;
800
+ } else if (cc === CC_SOLIDUS) {
801
+ // U+002F SOLIDUS (/)
802
+ // Switch to the self-closing start tag state.
803
+ state = STATE_SELF_CLOSING_START_TAG;
804
+ pos++;
805
+ } else if (cc === CC_GREATER_THAN) {
806
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
807
+ state = STATE_DATA;
808
+ pos = emitCloseTag(pos + 1);
809
+ } else {
810
+ const nextPos = emitOpenTag(pos + 1, false);
811
+ state =
812
+ nextPos > pos + 1
813
+ ? STATE_DATA
814
+ : getContentModeForTag(lastOpenTagName);
815
+ pos = nextPos;
816
+ }
817
+ } else {
818
+ // Anything else
819
+ // This is a missing-whitespace-between-attributes parse error. Reconsume in
820
+ // the before attribute name state.
821
+ reportError(
822
+ "missing-whitespace-between-attributes",
823
+ pos,
824
+ pos + 1,
825
+ "warning"
826
+ );
827
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
828
+ // Reconsume
829
+ }
830
+ break;
831
+
832
+ // https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
833
+ case STATE_SELF_CLOSING_START_TAG:
834
+ // Consume the next input character:
835
+ // U+003E GREATER-THAN SIGN (>)
836
+ // Set the self-closing flag of the current tag token. Switch to the data
837
+ // state. Emit the current tag token.
838
+ if (cc === CC_GREATER_THAN) {
839
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
840
+ state = STATE_DATA;
841
+ pos = emitCloseTag(pos + 1);
842
+ } else {
843
+ pos = emitOpenTag(pos + 1, true);
844
+ state = STATE_DATA;
845
+ }
846
+ } else {
847
+ // Anything else
848
+ // This is an unexpected-solidus-in-tag parse error. Reconsume in the before
849
+ // attribute name state.
850
+ reportError("unexpected-solidus-in-tag", pos, pos + 1, "warning");
851
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
852
+ // Reconsume
853
+ }
854
+ break;
855
+
856
+ // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
857
+ case STATE_MARKUP_DECLARATION_OPEN:
858
+ // If the next few characters are:
859
+ // Two U+002D HYPHEN-MINUS characters (-)
860
+ // Consume those two characters, create a comment token whose data
861
+ // is the empty string, and switch to the comment start state.
862
+ if (
863
+ cc === CC_HYPHEN_MINUS &&
864
+ input.charCodeAt(pos + 1) === CC_HYPHEN_MINUS
865
+ ) {
866
+ pos += 2;
867
+ commentStart = tagStart;
868
+ state = STATE_COMMENT_START;
869
+ } else if (
870
+ // ASCII case-insensitive match for the word "DOCTYPE"
871
+ // Consume those characters and switch to the DOCTYPE state.
872
+ (cc === 0x44 || cc === 0x64) /* D or d */ &&
873
+ (input.charCodeAt(pos + 1) | 0x20) === 0x6f /* o */ &&
874
+ (input.charCodeAt(pos + 2) | 0x20) === 0x63 /* c */ &&
875
+ (input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
876
+ (input.charCodeAt(pos + 4) | 0x20) === 0x79 /* y */ &&
877
+ (input.charCodeAt(pos + 5) | 0x20) === 0x70 /* p */ &&
878
+ (input.charCodeAt(pos + 6) | 0x20) === 0x65 /* e */
879
+ ) {
880
+ pos += 7;
881
+ commentStart = tagStart;
882
+ state = STATE_DOCTYPE;
883
+ } else if (
884
+ // The string "[CDATA[" (the five uppercase letters "CDATA" with a
885
+ // U+005B LEFT SQUARE BRACKET character before and after)
886
+ // Consume those characters and switch to the CDATA section state.
887
+ cc === CC_LEFT_SQUARE_BRACKET &&
888
+ input.charCodeAt(pos + 1) === 0x43 /* C */ &&
889
+ input.charCodeAt(pos + 2) === 0x44 /* D */ &&
890
+ input.charCodeAt(pos + 3) === 0x41 /* A */ &&
891
+ input.charCodeAt(pos + 4) === 0x54 /* T */ &&
892
+ input.charCodeAt(pos + 5) === 0x41 /* A */ &&
893
+ input.charCodeAt(pos + 6) === CC_LEFT_SQUARE_BRACKET
894
+ ) {
895
+ pos += 7;
896
+ commentStart = tagStart;
897
+ state = STATE_CDATA_SECTION;
898
+ } else {
899
+ // Anything else
900
+ // This is an incorrectly-opened-comment parse error. Create a comment token
901
+ // whose data is the empty string. Switch to the bogus comment state (don't
902
+ // consume anything in the current state).
903
+ reportError("incorrectly-opened-comment", tagStart, pos, "warning");
904
+ commentStart = tagStart;
905
+ state = STATE_BOGUS_COMMENT;
906
+ // Reconsume
907
+ }
908
+ break;
909
+
910
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
911
+ case STATE_COMMENT_START:
912
+ // Consume the next input character:
913
+ // U+002D HYPHEN-MINUS (-)
914
+ // Switch to the comment start dash state.
915
+ if (cc === CC_HYPHEN_MINUS) {
916
+ state = STATE_COMMENT_START_DASH;
917
+ pos++;
918
+ } else if (cc === CC_GREATER_THAN) {
919
+ // U+003E GREATER-THAN SIGN (>)
920
+ // This is an abrupt-closing-of-empty-comment parse error. Switch to the
921
+ // data state. Emit the current comment token.
922
+ reportError(
923
+ "abrupt-closing-of-empty-comment",
924
+ pos,
925
+ pos + 1,
926
+ "warning"
927
+ );
928
+ let nextPos = pos + 1;
929
+ if (callbacks.comment !== undefined) {
930
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
931
+ }
932
+ state = STATE_DATA;
933
+ textStart = nextPos;
934
+ pos = nextPos;
935
+ } else {
936
+ // Anything else
937
+ // Reconsume in the comment state.
938
+ state = STATE_COMMENT;
939
+ // Reconsume
940
+ }
941
+ break;
942
+
943
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
944
+ case STATE_COMMENT_START_DASH:
945
+ // Consume the next input character:
946
+ // U+002D HYPHEN-MINUS (-)
947
+ // Switch to the comment end state.
948
+ if (cc === CC_HYPHEN_MINUS) {
949
+ state = STATE_COMMENT_END;
950
+ pos++;
951
+ } else if (cc === CC_GREATER_THAN) {
952
+ // U+003E GREATER-THAN SIGN (>)
953
+ // This is an abrupt-closing-of-empty-comment parse error. Switch to the
954
+ // data state. Emit the current comment token.
955
+ reportError(
956
+ "abrupt-closing-of-empty-comment",
957
+ pos,
958
+ pos + 1,
959
+ "warning"
960
+ );
961
+ let nextPos = pos + 1;
962
+ if (callbacks.comment !== undefined) {
963
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
964
+ }
965
+ state = STATE_DATA;
966
+ textStart = nextPos;
967
+ pos = nextPos;
968
+ } else {
969
+ // Anything else
970
+ // Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
971
+ // Reconsume in the comment state.
972
+ state = STATE_COMMENT;
973
+ // Reconsume
974
+ }
975
+ break;
976
+
977
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-state
978
+ case STATE_COMMENT:
979
+ // Consume the next input character:
980
+ // U+003C LESS-THAN SIGN (<)
981
+ // Append a U+003C LESS-THAN SIGN character to the comment token's data. Switch to the comment less-than sign state.
982
+ if (cc === CC_LESS_THAN) {
983
+ state = STATE_COMMENT_LESS_THAN_SIGN;
984
+ pos++;
985
+ } else if (cc === CC_HYPHEN_MINUS) {
986
+ // Consume the next input character:
987
+ // U+002D HYPHEN-MINUS (-)
988
+ // Switch to the comment end dash state.
989
+ state = STATE_COMMENT_END_DASH;
990
+ pos++;
991
+ } else {
992
+ pos++;
993
+ }
994
+ break;
995
+
996
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
997
+ case STATE_COMMENT_END_DASH:
998
+ // Consume the next input character:
999
+ // U+002D HYPHEN-MINUS (-)
1000
+ // Switch to the comment end state.
1001
+ if (cc === CC_HYPHEN_MINUS) {
1002
+ state = STATE_COMMENT_END;
1003
+ pos++;
1004
+ } else {
1005
+ // Anything else
1006
+ // Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
1007
+ // Reconsume in the comment state.
1008
+ state = STATE_COMMENT;
1009
+ pos++;
1010
+ }
1011
+ break;
1012
+
1013
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
1014
+ case STATE_COMMENT_END:
1015
+ // Consume the next input character:
1016
+ // U+003E GREATER-THAN SIGN (>)
1017
+ // Switch to the data state. Emit the current comment token.
1018
+ if (cc === CC_GREATER_THAN) {
1019
+ let nextPos = pos + 1;
1020
+ if (callbacks.comment !== undefined) {
1021
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
1022
+ }
1023
+ state = STATE_DATA;
1024
+ textStart = nextPos;
1025
+ pos = nextPos;
1026
+ } else if (cc === CC_EXCLAMATION_MARK) {
1027
+ // U+0021 EXCLAMATION MARK (!)
1028
+ // Switch to the comment end bang state.
1029
+ state = STATE_COMMENT_END_BANG;
1030
+ pos++;
1031
+ } else if (cc === CC_HYPHEN_MINUS) {
1032
+ pos++;
1033
+ } else {
1034
+ // Anything else
1035
+ // Append two U+002D HYPHEN-MINUS characters (-) to the comment token's
1036
+ // data. Reconsume in the comment state.
1037
+ state = STATE_COMMENT;
1038
+ pos++;
1039
+ }
1040
+ break;
1041
+
1042
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
1043
+ case STATE_COMMENT_END_BANG:
1044
+ // Consume the next input character:
1045
+ // U+002D HYPHEN-MINUS (-)
1046
+ // Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
1047
+ // MARK character (!) to the comment token's data. Switch to the comment end
1048
+ // dash state.
1049
+ if (cc === CC_HYPHEN_MINUS) {
1050
+ state = STATE_COMMENT_END_DASH;
1051
+ pos++;
1052
+ } else if (cc === CC_GREATER_THAN) {
1053
+ // U+003E GREATER-THAN SIGN (>)
1054
+ // This is an incorrectly-closed-comment parse error. Switch to the data
1055
+ // state. Emit the current comment token.
1056
+ reportError("incorrectly-closed-comment", pos, pos + 1, "warning");
1057
+ let nextPos = pos + 1;
1058
+ if (callbacks.comment !== undefined) {
1059
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
1060
+ }
1061
+ state = STATE_DATA;
1062
+ textStart = nextPos;
1063
+ pos = nextPos;
1064
+ } else {
1065
+ // Anything else
1066
+ // Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
1067
+ // MARK character (!) to the comment token's data. Reconsume in the comment
1068
+ // state.
1069
+ state = STATE_COMMENT;
1070
+ pos++;
1071
+ }
1072
+ break;
1073
+
1074
+ // https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
1075
+ case STATE_BOGUS_COMMENT:
1076
+ // Consume the next input character:
1077
+ // U+003E GREATER-THAN SIGN (>)
1078
+ // Switch to the data state. Emit the current comment token.
1079
+ if (cc === CC_GREATER_THAN) {
1080
+ let nextPos = pos + 1;
1081
+ if (callbacks.comment !== undefined) {
1082
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
1083
+ }
1084
+ state = STATE_DATA;
1085
+ textStart = nextPos;
1086
+ pos = nextPos;
1087
+ } else {
1088
+ pos++;
1089
+ }
1090
+ break;
1091
+
1092
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-state
1093
+ case STATE_COMMENT_LESS_THAN_SIGN:
1094
+ // Consume the next input character:
1095
+ // U+0021 EXCLAMATION MARK (!)
1096
+ // Append the current input character to the comment token's data. Switch to
1097
+ // the comment less-than sign bang state.
1098
+ if (cc === CC_EXCLAMATION_MARK) {
1099
+ state = STATE_COMMENT_LESS_THAN_SIGN_BANG;
1100
+ pos++;
1101
+ } else if (cc === CC_LESS_THAN) {
1102
+ // U+003C LESS-THAN SIGN (<)
1103
+ // Append the current input character to the comment token's data.
1104
+ pos++;
1105
+ } else {
1106
+ // Anything else
1107
+ // Reconsume in the comment state.
1108
+ state = STATE_COMMENT;
1109
+ // Reconsume
1110
+ }
1111
+ break;
1112
+
1113
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-state
1114
+ case STATE_COMMENT_LESS_THAN_SIGN_BANG:
1115
+ // Consume the next input character:
1116
+ // U+002D HYPHEN-MINUS (-)
1117
+ // Switch to the comment less-than sign bang dash state.
1118
+ if (cc === CC_HYPHEN_MINUS) {
1119
+ state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH;
1120
+ pos++;
1121
+ } else {
1122
+ // Anything else
1123
+ // Reconsume in the comment state.
1124
+ state = STATE_COMMENT;
1125
+ // Reconsume
1126
+ }
1127
+ break;
1128
+
1129
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-state
1130
+ case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH:
1131
+ // Consume the next input character:
1132
+ // U+002D HYPHEN-MINUS (-)
1133
+ // Switch to the comment less-than sign bang dash dash state.
1134
+ if (cc === CC_HYPHEN_MINUS) {
1135
+ state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
1136
+ pos++;
1137
+ } else {
1138
+ // Anything else
1139
+ // Reconsume in the comment end dash state.
1140
+ state = STATE_COMMENT_END_DASH;
1141
+ // Reconsume
1142
+ }
1143
+ break;
1144
+
1145
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-dash-state
1146
+ case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH:
1147
+ // Consume the next input character:
1148
+ // U+003E GREATER-THAN SIGN (>)
1149
+ // EOF
1150
+ // Reconsume in the comment end state.
1151
+ // Anything else
1152
+ // This is a nested-comment parse error. Reconsume in the comment end state.
1153
+ if (cc !== CC_GREATER_THAN) {
1154
+ reportError("nested-comment", pos, pos + 1, "warning");
1155
+ }
1156
+ state = STATE_COMMENT_END;
1157
+ // Reconsume
1158
+ break;
1159
+
1160
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
1161
+ case STATE_DOCTYPE:
1162
+ // Consume the next input character:
1163
+ // U+0009 CHARACTER TABULATION (tab)
1164
+ // U+000A LINE FEED (LF)
1165
+ // U+000C FORM FEED (FF)
1166
+ // U+0020 SPACE
1167
+ // Switch to the before DOCTYPE name state.
1168
+ if (isSpace(cc)) {
1169
+ state = STATE_BEFORE_DOCTYPE_NAME;
1170
+ pos++;
1171
+ } else if (cc === CC_GREATER_THAN) {
1172
+ // U+003E GREATER-THAN SIGN (>)
1173
+ // Reconsume in the before DOCTYPE name state.
1174
+ state = STATE_BEFORE_DOCTYPE_NAME;
1175
+ } else {
1176
+ // Anything else
1177
+ // This is a missing-whitespace-before-doctype-name parse error. Reconsume
1178
+ // in the before DOCTYPE name state.
1179
+ reportError(
1180
+ "missing-whitespace-before-doctype-name",
1181
+ pos,
1182
+ pos + 1,
1183
+ "warning"
1184
+ );
1185
+ state = STATE_BEFORE_DOCTYPE_NAME;
1186
+ }
1187
+ break;
1188
+
1189
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
1190
+ case STATE_BEFORE_DOCTYPE_NAME:
1191
+ // Consume the next input character:
1192
+ // U+0009 CHARACTER TABULATION (tab)
1193
+ // U+000A LINE FEED (LF)
1194
+ // U+000C FORM FEED (FF)
1195
+ // U+0020 SPACE
1196
+ // Ignore the character.
1197
+ if (isSpace(cc)) {
1198
+ pos++;
1199
+ } else if (cc === 0x00) {
1200
+ // U+0000 NULL
1201
+ // This is an unexpected-null-character parse error. Create a new DOCTYPE
1202
+ // token. Set the token's name to a U+FFFD REPLACEMENT CHARACTER character.
1203
+ // Switch to the DOCTYPE name state.
1204
+ state = STATE_DOCTYPE_NAME;
1205
+ pos++;
1206
+ } else if (cc === CC_GREATER_THAN) {
1207
+ // U+003E GREATER-THAN SIGN (>)
1208
+ // This is a missing-doctype-name parse error. Create a new DOCTYPE token.
1209
+ // Set its force-quirks flag to on. Switch to the data state. Emit the
1210
+ // current token.
1211
+ reportError("missing-doctype-name", pos, pos + 1, "warning");
1212
+ let nextPos = pos + 1;
1213
+ if (callbacks.doctype !== undefined) {
1214
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1215
+ }
1216
+ state = STATE_DATA;
1217
+ textStart = nextPos;
1218
+ pos = nextPos;
1219
+ } else {
1220
+ // ASCII upper alpha
1221
+ // Create a new DOCTYPE token. Set the token's name to the lowercase version
1222
+ // of the current input character (add 0x0020 to the character's code
1223
+ // point). Switch to the DOCTYPE name state.
1224
+ // Anything else
1225
+ // Create a new DOCTYPE token. Set the token's name to the current input
1226
+ // character. Switch to the DOCTYPE name state.
1227
+ state = STATE_DOCTYPE_NAME;
1228
+ pos++;
1229
+ }
1230
+ break;
1231
+
1232
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
1233
+ case STATE_DOCTYPE_NAME:
1234
+ // Consume the next input character:
1235
+ // U+0009 CHARACTER TABULATION (tab)
1236
+ // U+000A LINE FEED (LF)
1237
+ // U+000C FORM FEED (FF)
1238
+ // U+0020 SPACE
1239
+ // Switch to the after DOCTYPE name state.
1240
+ if (isSpace(cc)) {
1241
+ state = STATE_AFTER_DOCTYPE_NAME;
1242
+ pos++;
1243
+ } else if (cc === CC_GREATER_THAN) {
1244
+ // U+003E GREATER-THAN SIGN (>)
1245
+ // Switch to the data state. Emit the current DOCTYPE token.
1246
+ let nextPos = pos + 1;
1247
+ if (callbacks.doctype !== undefined) {
1248
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1249
+ }
1250
+ state = STATE_DATA;
1251
+ textStart = nextPos;
1252
+ pos = nextPos;
1253
+ } else if (cc === 0x00) {
1254
+ // U+0000 NULL
1255
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1256
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's name.
1257
+ pos++;
1258
+ } else {
1259
+ // ASCII upper alpha
1260
+ // Append the lowercase version of the current input character (add 0x0020
1261
+ // to the character's code point) to the current DOCTYPE token's name.
1262
+ // Anything else
1263
+ // Append the current input character to the current DOCTYPE token's name.
1264
+ pos++;
1265
+ }
1266
+ break;
1267
+
1268
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
1269
+ case STATE_AFTER_DOCTYPE_NAME:
1270
+ // Consume the next input character:
1271
+ if (isSpace(cc)) {
1272
+ // U+0009 CHARACTER TABULATION (tab)
1273
+ // U+000A LINE FEED (LF)
1274
+ // U+000C FORM FEED (FF)
1275
+ // U+0020 SPACE
1276
+ // Ignore the character.
1277
+ pos++;
1278
+ } else if (cc === CC_GREATER_THAN) {
1279
+ // U+003E GREATER-THAN SIGN (>)
1280
+ // Switch to the data state. Emit the current DOCTYPE token.
1281
+ let nextPos = pos + 1;
1282
+ if (callbacks.doctype !== undefined) {
1283
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1284
+ }
1285
+ state = STATE_DATA;
1286
+ textStart = nextPos;
1287
+ pos = nextPos;
1288
+ } else if (
1289
+ pos + 5 < len &&
1290
+ (cc === 0x50 || cc === 0x70) /* P or p */ &&
1291
+ (input.charCodeAt(pos + 1) | 0x20) === 0x75 /* u */ &&
1292
+ (input.charCodeAt(pos + 2) | 0x20) === 0x62 /* b */ &&
1293
+ (input.charCodeAt(pos + 3) | 0x20) === 0x6c /* l */ &&
1294
+ (input.charCodeAt(pos + 4) | 0x20) === 0x69 /* i */ &&
1295
+ (input.charCodeAt(pos + 5) | 0x20) === 0x63 /* c */
1296
+ ) {
1297
+ // ASCII case-insensitive match for the word "PUBLIC"
1298
+ pos += 6;
1299
+ state = STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD;
1300
+ } else if (
1301
+ pos + 5 < len &&
1302
+ (cc === 0x53 || cc === 0x73) /* S or s */ &&
1303
+ (input.charCodeAt(pos + 1) | 0x20) === 0x79 /* y */ &&
1304
+ (input.charCodeAt(pos + 2) | 0x20) === 0x73 /* s */ &&
1305
+ (input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
1306
+ (input.charCodeAt(pos + 4) | 0x20) === 0x65 /* e */ &&
1307
+ (input.charCodeAt(pos + 5) | 0x20) === 0x6d /* m */
1308
+ ) {
1309
+ // ASCII case-insensitive match for the word "SYSTEM"
1310
+ pos += 6;
1311
+ state = STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD;
1312
+ } else {
1313
+ // Anything else
1314
+ // This is an invalid-character-sequence-after-doctype-name parse error. Set
1315
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1316
+ // bogus DOCTYPE state.
1317
+ reportError(
1318
+ "invalid-character-sequence-after-doctype-name",
1319
+ pos,
1320
+ pos + 1,
1321
+ "warning"
1322
+ );
1323
+ state = STATE_BOGUS_DOCTYPE;
1324
+ }
1325
+ break;
1326
+
1327
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
1328
+ case STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD:
1329
+ // Consume the next input character:
1330
+ if (isSpace(cc)) {
1331
+ // U+0009 CHARACTER TABULATION (tab)
1332
+ // U+000A LINE FEED (LF)
1333
+ // U+000C FORM FEED (FF)
1334
+ // U+0020 SPACE
1335
+ // Switch to the before DOCTYPE public identifier state.
1336
+ state = STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
1337
+ pos++;
1338
+ } else if (cc === CC_QUOTATION_MARK) {
1339
+ // U+0022 QUOTATION MARK (")
1340
+ // This is a missing-whitespace-after-doctype-public-keyword parse error.
1341
+ // Set the current DOCTYPE token's public identifier to the empty string
1342
+ // (not missing), then switch to the DOCTYPE public identifier
1343
+ // (double-quoted) state.
1344
+ reportError(
1345
+ "missing-whitespace-after-doctype-public-keyword",
1346
+ pos,
1347
+ pos + 1,
1348
+ "warning"
1349
+ );
1350
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
1351
+ pos++;
1352
+ } else if (cc === CC_APOSTROPHE) {
1353
+ // U+0027 APOSTROPHE (')
1354
+ // This is a missing-whitespace-after-doctype-public-keyword parse error.
1355
+ // Set the current DOCTYPE token's public identifier to the empty string
1356
+ // (not missing), then switch to the DOCTYPE public identifier
1357
+ // (single-quoted) state.
1358
+ reportError(
1359
+ "missing-whitespace-after-doctype-public-keyword",
1360
+ pos,
1361
+ pos + 1,
1362
+ "warning"
1363
+ );
1364
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
1365
+ pos++;
1366
+ } else if (cc === CC_GREATER_THAN) {
1367
+ // U+003E GREATER-THAN SIGN (>)
1368
+ // This is a missing-doctype-public-identifier parse error. Set the current
1369
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1370
+ // the current DOCTYPE token.
1371
+ reportError(
1372
+ "missing-doctype-public-identifier",
1373
+ pos,
1374
+ pos + 1,
1375
+ "warning"
1376
+ );
1377
+ let nextPos = pos + 1;
1378
+ if (callbacks.doctype !== undefined) {
1379
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1380
+ }
1381
+ state = STATE_DATA;
1382
+ textStart = nextPos;
1383
+ pos = nextPos;
1384
+ } else {
1385
+ // Anything else
1386
+ // This is a missing-quote-before-doctype-public-identifier parse error. Set
1387
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1388
+ // bogus DOCTYPE state.
1389
+ reportError(
1390
+ "missing-quote-before-doctype-public-identifier",
1391
+ pos,
1392
+ pos + 1,
1393
+ "warning"
1394
+ );
1395
+ state = STATE_BOGUS_DOCTYPE;
1396
+ }
1397
+ break;
1398
+
1399
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
1400
+ case STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
1401
+ // Consume the next input character:
1402
+ if (isSpace(cc)) {
1403
+ // U+0009 CHARACTER TABULATION (tab)
1404
+ // U+000A LINE FEED (LF)
1405
+ // U+000C FORM FEED (FF)
1406
+ // U+0020 SPACE
1407
+ // Ignore the character.
1408
+ pos++;
1409
+ } else if (cc === CC_QUOTATION_MARK) {
1410
+ // U+0022 QUOTATION MARK (")
1411
+ // Set the current DOCTYPE token's public identifier to the empty string
1412
+ // (not missing), then switch to the DOCTYPE public identifier
1413
+ // (double-quoted) state.
1414
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
1415
+ pos++;
1416
+ } else if (cc === CC_APOSTROPHE) {
1417
+ // U+0027 APOSTROPHE (')
1418
+ // Set the current DOCTYPE token's public identifier to the empty string
1419
+ // (not missing), then switch to the DOCTYPE public identifier
1420
+ // (single-quoted) state.
1421
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
1422
+ pos++;
1423
+ } else if (cc === CC_GREATER_THAN) {
1424
+ // U+003E GREATER-THAN SIGN (>)
1425
+ // This is a missing-doctype-public-identifier parse error. Set the current
1426
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1427
+ // the current DOCTYPE token.
1428
+ reportError(
1429
+ "missing-doctype-public-identifier",
1430
+ pos,
1431
+ pos + 1,
1432
+ "warning"
1433
+ );
1434
+ let nextPos = pos + 1;
1435
+ if (callbacks.doctype !== undefined) {
1436
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1437
+ }
1438
+ state = STATE_DATA;
1439
+ textStart = nextPos;
1440
+ pos = nextPos;
1441
+ } else {
1442
+ // Anything else
1443
+ // This is a missing-quote-before-doctype-public-identifier parse error. Set
1444
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1445
+ // bogus DOCTYPE state.
1446
+ reportError(
1447
+ "missing-quote-before-doctype-public-identifier",
1448
+ pos,
1449
+ pos + 1,
1450
+ "warning"
1451
+ );
1452
+ state = STATE_BOGUS_DOCTYPE;
1453
+ }
1454
+ break;
1455
+
1456
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
1457
+ case STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
1458
+ // Consume the next input character:
1459
+ if (cc === CC_QUOTATION_MARK) {
1460
+ // U+0022 QUOTATION MARK (")
1461
+ // Switch to the after DOCTYPE public identifier state.
1462
+ state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
1463
+ pos++;
1464
+ } else if (cc === 0x00) {
1465
+ // U+0000 NULL
1466
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1467
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
1468
+ // identifier.
1469
+ pos++;
1470
+ } else if (cc === CC_GREATER_THAN) {
1471
+ // U+003E GREATER-THAN SIGN (>)
1472
+ // This is an abrupt-doctype-public-identifier parse error. Set the current
1473
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1474
+ // the current DOCTYPE token.
1475
+ reportError(
1476
+ "abrupt-doctype-public-identifier",
1477
+ pos,
1478
+ pos + 1,
1479
+ "warning"
1480
+ );
1481
+ let nextPos = pos + 1;
1482
+ if (callbacks.doctype !== undefined) {
1483
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1484
+ }
1485
+ state = STATE_DATA;
1486
+ textStart = nextPos;
1487
+ pos = nextPos;
1488
+ } else {
1489
+ // Anything else
1490
+ // Append the current input character to the current DOCTYPE token's public
1491
+ // identifier.
1492
+ pos++;
1493
+ }
1494
+ break;
1495
+
1496
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
1497
+ case STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
1498
+ // Consume the next input character:
1499
+ if (cc === CC_APOSTROPHE) {
1500
+ // U+0027 APOSTROPHE (')
1501
+ // Switch to the after DOCTYPE public identifier state.
1502
+ state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
1503
+ pos++;
1504
+ } else if (cc === 0x00) {
1505
+ // U+0000 NULL
1506
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1507
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
1508
+ // identifier.
1509
+ pos++;
1510
+ } else if (cc === CC_GREATER_THAN) {
1511
+ // U+003E GREATER-THAN SIGN (>)
1512
+ // This is an abrupt-doctype-public-identifier parse error. Set the current
1513
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1514
+ // the current DOCTYPE token.
1515
+ reportError(
1516
+ "abrupt-doctype-public-identifier",
1517
+ pos,
1518
+ pos + 1,
1519
+ "warning"
1520
+ );
1521
+ let nextPos = pos + 1;
1522
+ if (callbacks.doctype !== undefined) {
1523
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1524
+ }
1525
+ state = STATE_DATA;
1526
+ textStart = nextPos;
1527
+ pos = nextPos;
1528
+ } else {
1529
+ // Anything else
1530
+ // Append the current input character to the current DOCTYPE token's public
1531
+ // identifier.
1532
+ pos++;
1533
+ }
1534
+ break;
1535
+
1536
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
1537
+ case STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
1538
+ // Consume the next input character:
1539
+ if (isSpace(cc)) {
1540
+ // U+0009 CHARACTER TABULATION (tab)
1541
+ // U+000A LINE FEED (LF)
1542
+ // U+000C FORM FEED (FF)
1543
+ // U+0020 SPACE
1544
+ // Switch to the between DOCTYPE public and system identifiers state.
1545
+ state = STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
1546
+ pos++;
1547
+ } else if (cc === CC_GREATER_THAN) {
1548
+ // U+003E GREATER-THAN SIGN (>)
1549
+ // Switch to the data state. Emit the current DOCTYPE token.
1550
+ let nextPos = pos + 1;
1551
+ if (callbacks.doctype !== undefined) {
1552
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1553
+ }
1554
+ state = STATE_DATA;
1555
+ textStart = nextPos;
1556
+ pos = nextPos;
1557
+ } else if (cc === CC_QUOTATION_MARK) {
1558
+ // U+0022 QUOTATION MARK (")
1559
+ // This is a missing-whitespace-between-doctype-public-and-system-identifiers
1560
+ // parse error. Set the current DOCTYPE token's system
1561
+ // identifier to the empty string (not missing), then switch
1562
+ // to the DOCTYPE system identifier (double-quoted) state.
1563
+ reportError(
1564
+ "missing-whitespace-between-doctype-public-and-system-identifiers",
1565
+ pos,
1566
+ pos + 1,
1567
+ "warning"
1568
+ );
1569
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1570
+ pos++;
1571
+ } else if (cc === CC_APOSTROPHE) {
1572
+ // U+0027 APOSTROPHE (')
1573
+ // This is a missing-whitespace-between-doctype-public-and-system-identifiers
1574
+ // parse error. Set the current DOCTYPE token's system
1575
+ // identifier to the empty string (not missing), then switch
1576
+ // to the DOCTYPE system identifier (single-quoted) state.
1577
+ reportError(
1578
+ "missing-whitespace-between-doctype-public-and-system-identifiers",
1579
+ pos,
1580
+ pos + 1,
1581
+ "warning"
1582
+ );
1583
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1584
+ pos++;
1585
+ } else {
1586
+ // Anything else
1587
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1588
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1589
+ // bogus DOCTYPE state.
1590
+ reportError(
1591
+ "missing-quote-before-doctype-system-identifier",
1592
+ pos,
1593
+ pos + 1,
1594
+ "warning"
1595
+ );
1596
+ state = STATE_BOGUS_DOCTYPE;
1597
+ }
1598
+ break;
1599
+
1600
+ // https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
1601
+ case STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
1602
+ // Consume the next input character:
1603
+ if (isSpace(cc)) {
1604
+ // U+0009 CHARACTER TABULATION (tab)
1605
+ // U+000A LINE FEED (LF)
1606
+ // U+000C FORM FEED (FF)
1607
+ // U+0020 SPACE
1608
+ // Ignore the character.
1609
+ pos++;
1610
+ } else if (cc === CC_GREATER_THAN) {
1611
+ // U+003E GREATER-THAN SIGN (>)
1612
+ // Switch to the data state. Emit the current DOCTYPE token.
1613
+ let nextPos = pos + 1;
1614
+ if (callbacks.doctype !== undefined) {
1615
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1616
+ }
1617
+ state = STATE_DATA;
1618
+ textStart = nextPos;
1619
+ pos = nextPos;
1620
+ } else if (cc === CC_QUOTATION_MARK) {
1621
+ // U+0022 QUOTATION MARK (")
1622
+ // Set the current DOCTYPE token's system identifier to the empty string
1623
+ // (not missing), then switch to the DOCTYPE system identifier
1624
+ // (double-quoted) state.
1625
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1626
+ pos++;
1627
+ } else if (cc === CC_APOSTROPHE) {
1628
+ // U+0027 APOSTROPHE (')
1629
+ // Set the current DOCTYPE token's system identifier to the empty string
1630
+ // (not missing), then switch to the DOCTYPE system identifier
1631
+ // (single-quoted) state.
1632
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1633
+ pos++;
1634
+ } else {
1635
+ // Anything else
1636
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1637
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1638
+ // bogus DOCTYPE state.
1639
+ reportError(
1640
+ "missing-quote-before-doctype-system-identifier",
1641
+ pos,
1642
+ pos + 1,
1643
+ "warning"
1644
+ );
1645
+ state = STATE_BOGUS_DOCTYPE;
1646
+ }
1647
+ break;
1648
+
1649
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
1650
+ case STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD:
1651
+ // Consume the next input character:
1652
+ if (isSpace(cc)) {
1653
+ // U+0009 CHARACTER TABULATION (tab)
1654
+ // U+000A LINE FEED (LF)
1655
+ // U+000C FORM FEED (FF)
1656
+ // U+0020 SPACE
1657
+ // Switch to the before DOCTYPE system identifier state.
1658
+ state = STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
1659
+ pos++;
1660
+ } else if (cc === CC_QUOTATION_MARK) {
1661
+ // U+0022 QUOTATION MARK (")
1662
+ // This is a missing-whitespace-after-doctype-system-keyword parse error.
1663
+ // Set the current DOCTYPE token's system identifier to the empty string
1664
+ // (not missing), then switch to the DOCTYPE system identifier
1665
+ // (double-quoted) state.
1666
+ reportError(
1667
+ "missing-whitespace-after-doctype-system-keyword",
1668
+ pos,
1669
+ pos + 1,
1670
+ "warning"
1671
+ );
1672
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1673
+ pos++;
1674
+ } else if (cc === CC_APOSTROPHE) {
1675
+ // U+0027 APOSTROPHE (')
1676
+ // This is a missing-whitespace-after-doctype-system-keyword parse error.
1677
+ // Set the current DOCTYPE token's system identifier to the empty string
1678
+ // (not missing), then switch to the DOCTYPE system identifier
1679
+ // (single-quoted) state.
1680
+ reportError(
1681
+ "missing-whitespace-after-doctype-system-keyword",
1682
+ pos,
1683
+ pos + 1,
1684
+ "warning"
1685
+ );
1686
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1687
+ pos++;
1688
+ } else if (cc === CC_GREATER_THAN) {
1689
+ // U+003E GREATER-THAN SIGN (>)
1690
+ // This is a missing-doctype-system-identifier parse error. Set the current
1691
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1692
+ // the current DOCTYPE token.
1693
+ reportError(
1694
+ "missing-doctype-system-identifier",
1695
+ pos,
1696
+ pos + 1,
1697
+ "warning"
1698
+ );
1699
+ let nextPos = pos + 1;
1700
+ if (callbacks.doctype !== undefined) {
1701
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1702
+ }
1703
+ state = STATE_DATA;
1704
+ textStart = nextPos;
1705
+ pos = nextPos;
1706
+ } else {
1707
+ // Anything else
1708
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1709
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1710
+ // bogus DOCTYPE state.
1711
+ reportError(
1712
+ "missing-quote-before-doctype-system-identifier",
1713
+ pos,
1714
+ pos + 1,
1715
+ "warning"
1716
+ );
1717
+ state = STATE_BOGUS_DOCTYPE;
1718
+ }
1719
+ break;
1720
+
1721
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
1722
+ case STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
1723
+ // Consume the next input character:
1724
+ if (isSpace(cc)) {
1725
+ // U+0009 CHARACTER TABULATION (tab)
1726
+ // U+000A LINE FEED (LF)
1727
+ // U+000C FORM FEED (FF)
1728
+ // U+0020 SPACE
1729
+ // Ignore the character.
1730
+ pos++;
1731
+ } else if (cc === CC_QUOTATION_MARK) {
1732
+ // U+0022 QUOTATION MARK (")
1733
+ // Set the current DOCTYPE token's system identifier to the empty string
1734
+ // (not missing), then switch to the DOCTYPE system identifier
1735
+ // (double-quoted) state.
1736
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1737
+ pos++;
1738
+ } else if (cc === CC_APOSTROPHE) {
1739
+ // U+0027 APOSTROPHE (')
1740
+ // Set the current DOCTYPE token's system identifier to the empty string
1741
+ // (not missing), then switch to the DOCTYPE system identifier
1742
+ // (single-quoted) state.
1743
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1744
+ pos++;
1745
+ } else if (cc === CC_GREATER_THAN) {
1746
+ // U+003E GREATER-THAN SIGN (>)
1747
+ // This is a missing-doctype-system-identifier parse error. Set the current
1748
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1749
+ // the current DOCTYPE token.
1750
+ reportError(
1751
+ "missing-doctype-system-identifier",
1752
+ pos,
1753
+ pos + 1,
1754
+ "warning"
1755
+ );
1756
+ let nextPos = pos + 1;
1757
+ if (callbacks.doctype !== undefined) {
1758
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1759
+ }
1760
+ state = STATE_DATA;
1761
+ textStart = nextPos;
1762
+ pos = nextPos;
1763
+ } else {
1764
+ // Anything else
1765
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1766
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1767
+ // bogus DOCTYPE state.
1768
+ reportError(
1769
+ "missing-quote-before-doctype-system-identifier",
1770
+ pos,
1771
+ pos + 1,
1772
+ "warning"
1773
+ );
1774
+ state = STATE_BOGUS_DOCTYPE;
1775
+ }
1776
+ break;
1777
+
1778
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
1779
+ case STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
1780
+ // Consume the next input character:
1781
+ if (cc === CC_QUOTATION_MARK) {
1782
+ // U+0022 QUOTATION MARK (")
1783
+ // Switch to the after DOCTYPE system identifier state.
1784
+ state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
1785
+ pos++;
1786
+ } else if (cc === 0x00) {
1787
+ // U+0000 NULL
1788
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1789
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
1790
+ // identifier.
1791
+ pos++;
1792
+ } else if (cc === CC_GREATER_THAN) {
1793
+ // U+003E GREATER-THAN SIGN (>)
1794
+ // This is an abrupt-doctype-system-identifier parse error. Set the current
1795
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1796
+ // the current DOCTYPE token.
1797
+ reportError(
1798
+ "abrupt-doctype-system-identifier",
1799
+ pos,
1800
+ pos + 1,
1801
+ "warning"
1802
+ );
1803
+ let nextPos = pos + 1;
1804
+ if (callbacks.doctype !== undefined) {
1805
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1806
+ }
1807
+ state = STATE_DATA;
1808
+ textStart = nextPos;
1809
+ pos = nextPos;
1810
+ } else {
1811
+ // Anything else
1812
+ // Append the current input character to the current DOCTYPE token's system
1813
+ // identifier.
1814
+ pos++;
1815
+ }
1816
+ break;
1817
+
1818
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
1819
+ case STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
1820
+ // Consume the next input character:
1821
+ if (cc === CC_APOSTROPHE) {
1822
+ // U+0027 APOSTROPHE (')
1823
+ // Switch to the after DOCTYPE system identifier state.
1824
+ state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
1825
+ pos++;
1826
+ } else if (cc === 0x00) {
1827
+ // U+0000 NULL
1828
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1829
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
1830
+ // identifier.
1831
+ pos++;
1832
+ } else if (cc === CC_GREATER_THAN) {
1833
+ // U+003E GREATER-THAN SIGN (>)
1834
+ // This is an abrupt-doctype-system-identifier parse error. Set the current
1835
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1836
+ // the current DOCTYPE token.
1837
+ reportError(
1838
+ "abrupt-doctype-system-identifier",
1839
+ pos,
1840
+ pos + 1,
1841
+ "warning"
1842
+ );
1843
+ let nextPos = pos + 1;
1844
+ if (callbacks.doctype !== undefined) {
1845
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1846
+ }
1847
+ state = STATE_DATA;
1848
+ textStart = nextPos;
1849
+ pos = nextPos;
1850
+ } else {
1851
+ // Anything else
1852
+ // Append the current input character to the current DOCTYPE token's system
1853
+ // identifier.
1854
+ pos++;
1855
+ }
1856
+ break;
1857
+
1858
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
1859
+ case STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
1860
+ // Consume the next input character:
1861
+ if (isSpace(cc)) {
1862
+ // U+0009 CHARACTER TABULATION (tab)
1863
+ // U+000A LINE FEED (LF)
1864
+ // U+000C FORM FEED (FF)
1865
+ // U+0020 SPACE
1866
+ // Ignore the character.
1867
+ pos++;
1868
+ } else if (cc === CC_GREATER_THAN) {
1869
+ // U+003E GREATER-THAN SIGN (>)
1870
+ // Switch to the data state. Emit the current DOCTYPE token.
1871
+ let nextPos = pos + 1;
1872
+ if (callbacks.doctype !== undefined) {
1873
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1874
+ }
1875
+ state = STATE_DATA;
1876
+ textStart = nextPos;
1877
+ pos = nextPos;
1878
+ } else {
1879
+ // Anything else
1880
+ // This is an unexpected-character-after-doctype-system-identifier parse
1881
+ // error. Reconsume in the bogus DOCTYPE state. (This does not set the
1882
+ // current DOCTYPE token's force-quirks flag to on.)
1883
+ reportError(
1884
+ "unexpected-character-after-doctype-system-identifier",
1885
+ pos,
1886
+ pos + 1,
1887
+ "warning"
1888
+ );
1889
+ state = STATE_BOGUS_DOCTYPE;
1890
+ }
1891
+ break;
1892
+
1893
+ // https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
1894
+ case STATE_BOGUS_DOCTYPE:
1895
+ // Consume the next input character:
1896
+ if (cc === CC_GREATER_THAN) {
1897
+ // U+003E GREATER-THAN SIGN (>)
1898
+ // Switch to the data state. Emit the DOCTYPE token.
1899
+ let nextPos = pos + 1;
1900
+ if (callbacks.doctype !== undefined) {
1901
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1902
+ }
1903
+ state = STATE_DATA;
1904
+ textStart = nextPos;
1905
+ pos = nextPos;
1906
+ } else if (cc === 0x00) {
1907
+ // U+0000 NULL
1908
+ // This is an unexpected-null-character parse error. Ignore the character.
1909
+ pos++;
1910
+ } else {
1911
+ // Anything else
1912
+ // Ignore the character.
1913
+ pos++;
1914
+ }
1915
+ break;
1916
+
1917
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
1918
+ case STATE_CDATA_SECTION:
1919
+ // Consume the next input character:
1920
+ // U+005D RIGHT SQUARE BRACKET (])
1921
+ // Switch to the CDATA section bracket state.
1922
+ if (cc === CC_RIGHT_SQUARE_BRACKET) {
1923
+ state = STATE_CDATA_SECTION_BRACKET;
1924
+ pos++;
1925
+ } else {
1926
+ // Anything else
1927
+ // Emit the current input character as a character token.
1928
+ pos++;
1929
+ }
1930
+ break;
1931
+
1932
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state
1933
+ case STATE_CDATA_SECTION_BRACKET:
1934
+ // Consume the next input character:
1935
+ // U+005D RIGHT SQUARE BRACKET (])
1936
+ // Switch to the CDATA section end state.
1937
+ if (cc === CC_RIGHT_SQUARE_BRACKET) {
1938
+ state = STATE_CDATA_SECTION_END;
1939
+ pos++;
1940
+ } else {
1941
+ // Anything else
1942
+ // Emit a U+005D RIGHT SQUARE BRACKET character token. Reconsume in the
1943
+ // CDATA section state.
1944
+ state = STATE_CDATA_SECTION;
1945
+ // Reconsume
1946
+ }
1947
+ break;
1948
+
1949
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state
1950
+ case STATE_CDATA_SECTION_END:
1951
+ // Consume the next input character:
1952
+ // U+005D RIGHT SQUARE BRACKET (])
1953
+ // Emit a U+005D RIGHT SQUARE BRACKET character token.
1954
+ if (cc === CC_RIGHT_SQUARE_BRACKET) {
1955
+ pos++;
1956
+ } else if (cc === CC_GREATER_THAN) {
1957
+ // U+003E GREATER-THAN SIGN (>)
1958
+ // Switch to the data state.
1959
+ let nextPos = pos + 1;
1960
+ if (callbacks.comment !== undefined) {
1961
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
1962
+ }
1963
+ state = STATE_DATA;
1964
+ textStart = nextPos;
1965
+ pos = nextPos;
1966
+ } else {
1967
+ // Anything else
1968
+ // Emit two U+005D RIGHT SQUARE BRACKET character tokens. Reconsume in the
1969
+ // CDATA section state.
1970
+ state = STATE_CDATA_SECTION;
1971
+ // Reconsume
1972
+ }
1973
+ break;
1974
+
1975
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
1976
+ case STATE_RCDATA:
1977
+ // Consume the next input character:
1978
+ // U+003C LESS-THAN SIGN (<)
1979
+ // Switch to the RCDATA less-than sign state.
1980
+ if (cc === CC_LESS_THAN) {
1981
+ tagStart = pos;
1982
+ state = STATE_RCDATA_LESS_THAN_SIGN;
1983
+ pos++;
1984
+ } else {
1985
+ // Anything else
1986
+ // Emit the current input character as a character token.
1987
+ pos++;
1988
+ }
1989
+ break;
1990
+
1991
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
1992
+ case STATE_RCDATA_LESS_THAN_SIGN:
1993
+ // Consume the next input character:
1994
+ // U+002F SOLIDUS (/)
1995
+ // Switch to the RCDATA end tag open state. (Spec sets a
1996
+ // temporary buffer here; we track the would-be content via
1997
+ // offset ranges instead.)
1998
+ if (cc === CC_SOLIDUS) {
1999
+ state = STATE_RCDATA_END_TAG_OPEN;
2000
+ pos++;
2001
+ } else {
2002
+ // Anything else
2003
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RCDATA
2004
+ // state.
2005
+ state = STATE_RCDATA;
2006
+ // Reconsume
2007
+ }
2008
+ break;
2009
+
2010
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
2011
+ case STATE_RCDATA_END_TAG_OPEN:
2012
+ // Consume the next input character:
2013
+ // ASCII alpha
2014
+ // Create a new end tag token, set its tag name to the empty string.
2015
+ // Reconsume in the RCDATA end tag name state.
2016
+ if (isAsciiAlpha(cc)) {
2017
+ tagNameStart = pos;
2018
+ state = STATE_RCDATA_END_TAG_NAME;
2019
+ // Reconsume
2020
+ } else {
2021
+ // Anything else
2022
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
2023
+ // character token. Reconsume in the RCDATA state.
2024
+ state = STATE_RCDATA;
2025
+ // Reconsume
2026
+ }
2027
+ break;
2028
+
2029
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
2030
+ case STATE_RCDATA_END_TAG_NAME:
2031
+ // Consume the next input character:
2032
+ // U+0009 CHARACTER TABULATION (tab)
2033
+ // U+000A LINE FEED (LF)
2034
+ // U+000C FORM FEED (FF)
2035
+ // U+0020 SPACE
2036
+ // If the current end tag token is an appropriate end tag token, then switch
2037
+ // to the before attribute name state. Otherwise, treat it as per the
2038
+ // "anything else" entry below.
2039
+ if (isSpace(cc)) {
2040
+ tagNameEnd = pos;
2041
+ if (
2042
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2043
+ lastOpenTagName
2044
+ ) {
2045
+ flushText(tagStart);
2046
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
2047
+ pos++;
2048
+ } else {
2049
+ state = STATE_RCDATA;
2050
+ // Reconsume
2051
+ }
2052
+ } else if (cc === CC_SOLIDUS) {
2053
+ // U+002F SOLIDUS (/)
2054
+ // If the current end tag token is an appropriate end tag token, then switch
2055
+ // to the self-closing start tag state. Otherwise, treat it as per the
2056
+ // "anything else" entry below.
2057
+ tagNameEnd = pos;
2058
+ if (
2059
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2060
+ lastOpenTagName
2061
+ ) {
2062
+ flushText(tagStart);
2063
+ state = STATE_SELF_CLOSING_START_TAG;
2064
+ pos++;
2065
+ } else {
2066
+ state = STATE_RCDATA;
2067
+ // Reconsume
2068
+ }
2069
+ } else if (cc === CC_GREATER_THAN) {
2070
+ // U+003E GREATER-THAN SIGN (>)
2071
+ // If the current end tag token is an appropriate end tag token, then switch
2072
+ // to the data state and emit the current tag token. Otherwise, treat it as
2073
+ // per the "anything else" entry below.
2074
+ tagNameEnd = pos;
2075
+ if (
2076
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2077
+ lastOpenTagName
2078
+ ) {
2079
+ flushText(tagStart);
2080
+ state = STATE_DATA;
2081
+ pos = emitCloseTag(pos + 1);
2082
+ } else {
2083
+ state = STATE_RCDATA;
2084
+ // Reconsume
2085
+ }
2086
+ } else if (isAsciiAlpha(cc)) {
2087
+ // ASCII upper alpha / ASCII lower alpha
2088
+ // Append the lowercase version of the current input character to the
2089
+ // current tag token's tag name. Append the current input character to
2090
+ // the temporary buffer.
2091
+ pos++;
2092
+ } else {
2093
+ // Anything else
2094
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
2095
+ // token, and a character token for each of the characters in the temporary
2096
+ // buffer (in the order they were added to the buffer). Reconsume in the
2097
+ // RCDATA state.
2098
+ state = STATE_RCDATA;
2099
+ // Reconsume
2100
+ }
2101
+ break;
2102
+
2103
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
2104
+ case STATE_RAWTEXT:
2105
+ // Consume the next input character:
2106
+ // U+003C LESS-THAN SIGN (<)
2107
+ // Switch to the RAWTEXT less-than sign state.
2108
+ if (cc === CC_LESS_THAN) {
2109
+ tagStart = pos;
2110
+ state = STATE_RAWTEXT_LESS_THAN_SIGN;
2111
+ pos++;
2112
+ } else {
2113
+ pos++;
2114
+ }
2115
+ break;
2116
+
2117
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
2118
+ case STATE_RAWTEXT_LESS_THAN_SIGN:
2119
+ // Consume the next input character:
2120
+ // U+002F SOLIDUS (/)
2121
+ // Switch to the RAWTEXT end tag open state. (Spec sets a
2122
+ // temporary buffer here; we track via offset ranges instead.)
2123
+ if (cc === CC_SOLIDUS) {
2124
+ state = STATE_RAWTEXT_END_TAG_OPEN;
2125
+ pos++;
2126
+ } else {
2127
+ // Anything else
2128
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RAWTEXT
2129
+ // state.
2130
+ state = STATE_RAWTEXT;
2131
+ // Reconsume
2132
+ }
2133
+ break;
2134
+
2135
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
2136
+ case STATE_RAWTEXT_END_TAG_OPEN:
2137
+ // Consume the next input character:
2138
+ // ASCII alpha
2139
+ // Create a new end tag token, set its tag name to the empty string.
2140
+ // Reconsume in the RAWTEXT end tag name state.
2141
+ if (isAsciiAlpha(cc)) {
2142
+ tagNameStart = pos;
2143
+ state = STATE_RAWTEXT_END_TAG_NAME;
2144
+ // Reconsume
2145
+ } else {
2146
+ // Anything else
2147
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
2148
+ // character token. Reconsume in the RAWTEXT state.
2149
+ state = STATE_RAWTEXT;
2150
+ // Reconsume
2151
+ }
2152
+ break;
2153
+
2154
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
2155
+ case STATE_RAWTEXT_END_TAG_NAME:
2156
+ // Consume the next input character:
2157
+ // U+0009 CHARACTER TABULATION (tab)
2158
+ // U+000A LINE FEED (LF)
2159
+ // U+000C FORM FEED (FF)
2160
+ // U+0020 SPACE
2161
+ // If the current end tag token is an appropriate end tag token, then switch
2162
+ // to the before attribute name state. Otherwise, treat it as per the
2163
+ // "anything else" entry below.
2164
+ if (isSpace(cc)) {
2165
+ tagNameEnd = pos;
2166
+ if (
2167
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2168
+ lastOpenTagName
2169
+ ) {
2170
+ flushText(tagStart);
2171
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
2172
+ pos++;
2173
+ } else {
2174
+ state = STATE_RAWTEXT;
2175
+ }
2176
+ } else if (cc === CC_SOLIDUS) {
2177
+ // U+002F SOLIDUS (/)
2178
+ // If the current end tag token is an appropriate end tag token, then switch
2179
+ // to the self-closing start tag state. Otherwise, treat it as per the
2180
+ // "anything else" entry below.
2181
+ tagNameEnd = pos;
2182
+ if (
2183
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2184
+ lastOpenTagName
2185
+ ) {
2186
+ flushText(tagStart);
2187
+ state = STATE_SELF_CLOSING_START_TAG;
2188
+ pos++;
2189
+ } else {
2190
+ state = STATE_RAWTEXT;
2191
+ }
2192
+ } else if (cc === CC_GREATER_THAN) {
2193
+ // U+003E GREATER-THAN SIGN (>)
2194
+ // If the current end tag token is an appropriate end tag token, then switch
2195
+ // to the data state and emit the current tag token. Otherwise, treat it as
2196
+ // per the "anything else" entry below.
2197
+ tagNameEnd = pos;
2198
+ if (
2199
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2200
+ lastOpenTagName
2201
+ ) {
2202
+ flushText(tagStart);
2203
+ state = STATE_DATA;
2204
+ pos = emitCloseTag(pos + 1);
2205
+ } else {
2206
+ state = STATE_RAWTEXT;
2207
+ }
2208
+ } else if (isAsciiAlpha(cc)) {
2209
+ // ASCII upper alpha / ASCII lower alpha
2210
+ // Append the lowercase version of the current input character to the
2211
+ // current tag token's tag name. Append the current input character to
2212
+ // the temporary buffer.
2213
+ pos++;
2214
+ } else {
2215
+ // Anything else
2216
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
2217
+ // token, and a character token for each of the characters in the temporary
2218
+ // buffer (in the order they were added to the buffer). Reconsume in the
2219
+ // RAWTEXT state.
2220
+ state = STATE_RAWTEXT;
2221
+ // Reconsume
2222
+ }
2223
+ break;
2224
+
2225
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
2226
+ case STATE_SCRIPT_DATA:
2227
+ // Consume the next input character:
2228
+ // U+003C LESS-THAN SIGN (<)
2229
+ // Switch to the script data less-than sign state.
2230
+ if (cc === CC_LESS_THAN) {
2231
+ tagStart = pos;
2232
+ state = STATE_SCRIPT_DATA_LESS_THAN_SIGN;
2233
+ pos++;
2234
+ } else {
2235
+ pos++;
2236
+ }
2237
+ break;
2238
+
2239
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
2240
+ case STATE_SCRIPT_DATA_LESS_THAN_SIGN:
2241
+ // Consume the next input character:
2242
+ // U+002F SOLIDUS (/)
2243
+ // Switch to the script data end tag open state. (Spec sets a
2244
+ // temporary buffer here; we track via offset ranges instead.)
2245
+ if (cc === CC_SOLIDUS) {
2246
+ state = STATE_SCRIPT_DATA_END_TAG_OPEN;
2247
+ pos++;
2248
+ } else if (cc === CC_EXCLAMATION_MARK) {
2249
+ // U+0021 EXCLAMATION MARK (!)
2250
+ // Switch to the script data escape start state. Emit a U+003C LESS-THAN
2251
+ // SIGN character token and a U+0021 EXCLAMATION MARK character token.
2252
+ state = STATE_SCRIPT_DATA_ESCAPE_START;
2253
+ pos++;
2254
+ } else {
2255
+ // Anything else
2256
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
2257
+ // data state.
2258
+ state = STATE_SCRIPT_DATA;
2259
+ // Reconsume
2260
+ }
2261
+ break;
2262
+
2263
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
2264
+ case STATE_SCRIPT_DATA_END_TAG_OPEN:
2265
+ // Consume the next input character:
2266
+ // ASCII alpha
2267
+ // Create a new end tag token, set its tag name to the empty string.
2268
+ // Reconsume in the script data end tag name state.
2269
+ if (isAsciiAlpha(cc)) {
2270
+ tagNameStart = pos;
2271
+ state = STATE_SCRIPT_DATA_END_TAG_NAME;
2272
+ // Reconsume
2273
+ } else {
2274
+ // Anything else
2275
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
2276
+ // character token. Reconsume in the script data state.
2277
+ state = STATE_SCRIPT_DATA;
2278
+ // Reconsume
2279
+ }
2280
+ break;
2281
+
2282
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
2283
+ case STATE_SCRIPT_DATA_END_TAG_NAME:
2284
+ // Consume the next input character:
2285
+ // U+0009 CHARACTER TABULATION (tab)
2286
+ // U+000A LINE FEED (LF)
2287
+ // U+000C FORM FEED (FF)
2288
+ // U+0020 SPACE
2289
+ // If the current end tag token is an appropriate end tag token, then switch
2290
+ // to the before attribute name state. Otherwise, treat it as per the
2291
+ // "anything else" entry below.
2292
+ if (isSpace(cc)) {
2293
+ tagNameEnd = pos;
2294
+ if (
2295
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2296
+ lastOpenTagName
2297
+ ) {
2298
+ flushText(tagStart);
2299
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
2300
+ pos++;
2301
+ } else {
2302
+ state = STATE_SCRIPT_DATA;
2303
+ }
2304
+ } else if (cc === CC_SOLIDUS) {
2305
+ // U+002F SOLIDUS (/)
2306
+ // If the current end tag token is an appropriate end tag token, then switch
2307
+ // to the self-closing start tag state. Otherwise, treat it as per the
2308
+ // "anything else" entry below.
2309
+ tagNameEnd = pos;
2310
+ if (
2311
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2312
+ lastOpenTagName
2313
+ ) {
2314
+ flushText(tagStart);
2315
+ state = STATE_SELF_CLOSING_START_TAG;
2316
+ pos++;
2317
+ } else {
2318
+ state = STATE_SCRIPT_DATA;
2319
+ }
2320
+ } else if (cc === CC_GREATER_THAN) {
2321
+ // U+003E GREATER-THAN SIGN (>)
2322
+ // If the current end tag token is an appropriate end tag token, then switch
2323
+ // to the data state and emit the current tag token. Otherwise, treat it as
2324
+ // per the "anything else" entry below.
2325
+ tagNameEnd = pos;
2326
+ if (
2327
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2328
+ lastOpenTagName
2329
+ ) {
2330
+ flushText(tagStart);
2331
+ state = STATE_DATA;
2332
+ pos = emitCloseTag(pos + 1);
2333
+ } else {
2334
+ state = STATE_SCRIPT_DATA;
2335
+ }
2336
+ } else if (isAsciiAlpha(cc)) {
2337
+ // ASCII upper alpha / ASCII lower alpha
2338
+ pos++;
2339
+ } else {
2340
+ // Anything else
2341
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
2342
+ // token, and a character token for each of the characters in the temporary
2343
+ // buffer (in the order they were added to the buffer). Reconsume in the
2344
+ // script data state.
2345
+ state = STATE_SCRIPT_DATA;
2346
+ // Reconsume
2347
+ }
2348
+ break;
2349
+
2350
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
2351
+ case STATE_SCRIPT_DATA_ESCAPE_START:
2352
+ // Consume the next input character:
2353
+ // U+002D HYPHEN-MINUS (-)
2354
+ // Switch to the script data escape start dash state. Emit a U+002D
2355
+ // HYPHEN-MINUS character token.
2356
+ if (cc === CC_HYPHEN_MINUS) {
2357
+ state = STATE_SCRIPT_DATA_ESCAPE_START_DASH;
2358
+ pos++;
2359
+ } else {
2360
+ // Anything else
2361
+ // Reconsume in the script data state.
2362
+ state = STATE_SCRIPT_DATA;
2363
+ // Reconsume
2364
+ }
2365
+ break;
2366
+
2367
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
2368
+ case STATE_SCRIPT_DATA_ESCAPE_START_DASH:
2369
+ // Consume the next input character:
2370
+ // U+002D HYPHEN-MINUS (-)
2371
+ // Switch to the script data escaped dash dash state. Emit a U+002D
2372
+ // HYPHEN-MINUS character token.
2373
+ if (cc === CC_HYPHEN_MINUS) {
2374
+ state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
2375
+ pos++;
2376
+ } else {
2377
+ // Anything else
2378
+ // Reconsume in the script data state.
2379
+ state = STATE_SCRIPT_DATA;
2380
+ // Reconsume
2381
+ }
2382
+ break;
2383
+
2384
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
2385
+ case STATE_SCRIPT_DATA_ESCAPED:
2386
+ // Consume the next input character:
2387
+ // U+002D HYPHEN-MINUS (-)
2388
+ // Switch to the script data escaped dash state. Emit a U+002D HYPHEN-MINUS
2389
+ // character token.
2390
+ if (cc === CC_HYPHEN_MINUS) {
2391
+ state = STATE_SCRIPT_DATA_ESCAPED_DASH;
2392
+ pos++;
2393
+ } else if (cc === CC_LESS_THAN) {
2394
+ // U+003C LESS-THAN SIGN (<)
2395
+ // Switch to the script data escaped less-than sign state.
2396
+ tagStart = pos;
2397
+ state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2398
+ pos++;
2399
+ } else {
2400
+ // Anything else
2401
+ // Emit the current input character as a character token.
2402
+ pos++;
2403
+ }
2404
+ break;
2405
+
2406
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
2407
+ case STATE_SCRIPT_DATA_ESCAPED_DASH:
2408
+ // Consume the next input character:
2409
+ // U+002D HYPHEN-MINUS (-)
2410
+ // Switch to the script data escaped dash dash state. Emit a U+002D
2411
+ // HYPHEN-MINUS character token.
2412
+ if (cc === CC_HYPHEN_MINUS) {
2413
+ state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
2414
+ pos++;
2415
+ } else if (cc === CC_LESS_THAN) {
2416
+ // U+003C LESS-THAN SIGN (<)
2417
+ // Switch to the script data escaped less-than sign state.
2418
+ tagStart = pos;
2419
+ state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2420
+ pos++;
2421
+ } else {
2422
+ // Anything else
2423
+ // Switch to the script data escaped state. Emit the current input character
2424
+ // as a character token.
2425
+ state = STATE_SCRIPT_DATA_ESCAPED;
2426
+ pos++;
2427
+ }
2428
+ break;
2429
+
2430
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
2431
+ case STATE_SCRIPT_DATA_ESCAPED_DASH_DASH:
2432
+ // Consume the next input character:
2433
+ // U+002D HYPHEN-MINUS (-)
2434
+ // Emit a U+002D HYPHEN-MINUS character token.
2435
+ if (cc === CC_HYPHEN_MINUS) {
2436
+ pos++;
2437
+ } else if (cc === CC_LESS_THAN) {
2438
+ // U+003C LESS-THAN SIGN (<)
2439
+ // Switch to the script data escaped less-than sign state.
2440
+ tagStart = pos;
2441
+ state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2442
+ pos++;
2443
+ } else if (cc === CC_GREATER_THAN) {
2444
+ // U+003E GREATER-THAN SIGN (>)
2445
+ // Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
2446
+ // character token.
2447
+ state = STATE_SCRIPT_DATA;
2448
+ pos++;
2449
+ } else {
2450
+ // Anything else
2451
+ // Switch to the script data escaped state. Emit the current input character
2452
+ // as a character token.
2453
+ state = STATE_SCRIPT_DATA_ESCAPED;
2454
+ pos++;
2455
+ }
2456
+ break;
2457
+
2458
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
2459
+ case STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
2460
+ // Consume the next input character:
2461
+ // U+002F SOLIDUS (/)
2462
+ // Switch to the script data escaped end tag open state.
2463
+ // (Spec sets a temporary buffer; we track via offset ranges.)
2464
+ if (cc === CC_SOLIDUS) {
2465
+ state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
2466
+ pos++;
2467
+ } else if (isAsciiAlpha(cc)) {
2468
+ // ASCII alpha
2469
+ // Set the temporary buffer to the empty string. Emit a U+003C LESS-THAN
2470
+ // SIGN character token. Reconsume in the script data double escape start
2471
+ // state.
2472
+ scriptMatch = 0;
2473
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START;
2474
+ // Reconsume
2475
+ } else {
2476
+ // Anything else
2477
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
2478
+ // data escaped state.
2479
+ state = STATE_SCRIPT_DATA_ESCAPED;
2480
+ // Reconsume
2481
+ }
2482
+ break;
2483
+
2484
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
2485
+ case STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN:
2486
+ // Consume the next input character:
2487
+ // ASCII alpha
2488
+ // Create a new end tag token, set its tag name to the empty string.
2489
+ // Reconsume in the script data escaped end tag name state.
2490
+ if (isAsciiAlpha(cc)) {
2491
+ tagNameStart = pos;
2492
+ state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME;
2493
+ // Reconsume
2494
+ } else {
2495
+ // Anything else
2496
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
2497
+ // character token. Reconsume in the script data escaped state.
2498
+ state = STATE_SCRIPT_DATA_ESCAPED;
2499
+ // Reconsume
2500
+ }
2501
+ break;
2502
+
2503
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
2504
+ case STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME:
2505
+ // Consume the next input character:
2506
+ // U+0009 CHARACTER TABULATION (tab)
2507
+ // U+000A LINE FEED (LF)
2508
+ // U+000C FORM FEED (FF)
2509
+ // U+0020 SPACE
2510
+ // If the current end tag token is an appropriate end tag token, then switch
2511
+ // to the before attribute name state. Otherwise, treat it as per the
2512
+ // "anything else" entry below.
2513
+ if (isSpace(cc)) {
2514
+ tagNameEnd = pos;
2515
+ if (
2516
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2517
+ lastOpenTagName
2518
+ ) {
2519
+ flushText(tagStart);
2520
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
2521
+ pos++;
2522
+ } else {
2523
+ state = STATE_SCRIPT_DATA_ESCAPED;
2524
+ }
2525
+ } else if (cc === CC_SOLIDUS) {
2526
+ // U+002F SOLIDUS (/)
2527
+ // If the current end tag token is an appropriate end tag token, then switch
2528
+ // to the self-closing start tag state. Otherwise, treat it as per the
2529
+ // "anything else" entry below.
2530
+ tagNameEnd = pos;
2531
+ if (
2532
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2533
+ lastOpenTagName
2534
+ ) {
2535
+ flushText(tagStart);
2536
+ state = STATE_SELF_CLOSING_START_TAG;
2537
+ pos++;
2538
+ } else {
2539
+ state = STATE_SCRIPT_DATA_ESCAPED;
2540
+ }
2541
+ } else if (cc === CC_GREATER_THAN) {
2542
+ // U+003E GREATER-THAN SIGN (>)
2543
+ // If the current end tag token is an appropriate end tag token, then switch
2544
+ // to the data state and emit the current tag token. Otherwise, treat it as
2545
+ // per the "anything else" entry below.
2546
+ tagNameEnd = pos;
2547
+ if (
2548
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2549
+ lastOpenTagName
2550
+ ) {
2551
+ flushText(tagStart);
2552
+ state = STATE_DATA;
2553
+ pos = emitCloseTag(pos + 1);
2554
+ } else {
2555
+ state = STATE_SCRIPT_DATA_ESCAPED;
2556
+ }
2557
+ } else if (isAsciiAlpha(cc)) {
2558
+ // ASCII upper alpha / ASCII lower alpha
2559
+ pos++;
2560
+ } else {
2561
+ // Anything else
2562
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
2563
+ // token, and a character token for each of the characters in the temporary
2564
+ // buffer (in the order they were added to the buffer). Reconsume in the
2565
+ // script data escaped state.
2566
+ state = STATE_SCRIPT_DATA_ESCAPED;
2567
+ // Reconsume
2568
+ }
2569
+ break;
2570
+
2571
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
2572
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START:
2573
+ // Consume the next input character:
2574
+ // U+0009 CHARACTER TABULATION (tab)
2575
+ // U+000A LINE FEED (LF)
2576
+ // U+000C FORM FEED (FF)
2577
+ // U+0020 SPACE
2578
+ // U+002F SOLIDUS (/)
2579
+ // U+003E GREATER-THAN SIGN (>)
2580
+ // If the temporary buffer is the string "script", then switch to the script
2581
+ // data double escaped state. Otherwise, switch to the script data escaped
2582
+ // state. Emit the current input character as a character token.
2583
+ if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
2584
+ state =
2585
+ scriptMatch === 6
2586
+ ? STATE_SCRIPT_DATA_DOUBLE_ESCAPED
2587
+ : STATE_SCRIPT_DATA_ESCAPED;
2588
+ pos++;
2589
+ } else if (isAsciiUpperAlpha(cc) || isAsciiLowerAlpha(cc)) {
2590
+ // ASCII alpha — advance the `"script"` match counter if the
2591
+ // lowercase form matches the next expected char, otherwise
2592
+ // snap to the sentinel so further chars can't revive a
2593
+ // match. No buffer allocation.
2594
+ const lower = isAsciiUpperAlpha(cc) ? cc + 0x20 : cc;
2595
+ if (scriptMatch < 6 && lower === "script".charCodeAt(scriptMatch)) {
2596
+ scriptMatch++;
2597
+ } else {
2598
+ scriptMatch = 7;
2599
+ }
2600
+ pos++;
2601
+ } else {
2602
+ // Anything else
2603
+ // Reconsume in the script data escaped state.
2604
+ state = STATE_SCRIPT_DATA_ESCAPED;
2605
+ // Reconsume
2606
+ }
2607
+ break;
2608
+
2609
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
2610
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED:
2611
+ // Consume the next input character:
2612
+ // U+002D HYPHEN-MINUS (-)
2613
+ // Switch to the script data double escaped dash state. Emit a U+002D
2614
+ // HYPHEN-MINUS character token.
2615
+ if (cc === CC_HYPHEN_MINUS) {
2616
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
2617
+ pos++;
2618
+ } else if (cc === CC_LESS_THAN) {
2619
+ // U+003C LESS-THAN SIGN (<)
2620
+ // Switch to the script data double escaped less-than sign state. Emit a
2621
+ // U+003C LESS-THAN SIGN character token.
2622
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
2623
+ pos++;
2624
+ } else {
2625
+ // Anything else
2626
+ // Emit the current input character as a character token.
2627
+ pos++;
2628
+ }
2629
+ break;
2630
+
2631
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
2632
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
2633
+ // Consume the next input character:
2634
+ // U+002D HYPHEN-MINUS (-)
2635
+ // Switch to the script data double escaped dash dash state. Emit a U+002D
2636
+ // HYPHEN-MINUS character token.
2637
+ if (cc === CC_HYPHEN_MINUS) {
2638
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
2639
+ pos++;
2640
+ } else if (cc === CC_LESS_THAN) {
2641
+ // U+003C LESS-THAN SIGN (<)
2642
+ // Switch to the script data double escaped less-than sign state. Emit a
2643
+ // U+003C LESS-THAN SIGN character token.
2644
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
2645
+ pos++;
2646
+ } else {
2647
+ // Anything else
2648
+ // Switch to the script data double escaped state. Emit the current input
2649
+ // character as a character token.
2650
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2651
+ pos++;
2652
+ }
2653
+ break;
2654
+
2655
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
2656
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
2657
+ // Consume the next input character:
2658
+ // U+002D HYPHEN-MINUS (-)
2659
+ // Emit a U+002D HYPHEN-MINUS character token.
2660
+ if (cc === CC_HYPHEN_MINUS) {
2661
+ pos++;
2662
+ } else if (cc === CC_LESS_THAN) {
2663
+ // U+003C LESS-THAN SIGN (<)
2664
+ // Switch to the script data double escaped less-than sign state. Emit a
2665
+ // U+003C LESS-THAN SIGN character token.
2666
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
2667
+ pos++;
2668
+ } else if (cc === CC_GREATER_THAN) {
2669
+ // U+003E GREATER-THAN SIGN (>)
2670
+ // Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
2671
+ // character token.
2672
+ state = STATE_SCRIPT_DATA;
2673
+ pos++;
2674
+ } else {
2675
+ // Anything else
2676
+ // Switch to the script data double escaped state. Emit the current input
2677
+ // character as a character token.
2678
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2679
+ pos++;
2680
+ }
2681
+ break;
2682
+
2683
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
2684
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
2685
+ // Consume the next input character:
2686
+ // U+002F SOLIDUS (/)
2687
+ // Set the temporary buffer to the empty string. Switch to the script data
2688
+ // double escape end state. Emit a U+002F SOLIDUS character token.
2689
+ if (cc === CC_SOLIDUS) {
2690
+ scriptMatch = 0;
2691
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END;
2692
+ pos++;
2693
+ } else {
2694
+ // Anything else
2695
+ // Reconsume in the script data double escaped state.
2696
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2697
+ // Reconsume
2698
+ }
2699
+ break;
2700
+
2701
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
2702
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END:
2703
+ // Consume the next input character:
2704
+ // U+0009 CHARACTER TABULATION (tab)
2705
+ // U+000A LINE FEED (LF)
2706
+ // U+000C FORM FEED (FF)
2707
+ // U+0020 SPACE
2708
+ // U+002F SOLIDUS (/)
2709
+ // U+003E GREATER-THAN SIGN (>)
2710
+ // If the temporary buffer is the string "script", then switch to the script
2711
+ // data escaped state. Otherwise, switch to the script data double escaped
2712
+ // state. Emit the current input character as a character token.
2713
+ if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
2714
+ state =
2715
+ scriptMatch === 6
2716
+ ? STATE_SCRIPT_DATA_ESCAPED
2717
+ : STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2718
+ pos++;
2719
+ } else if (isAsciiUpperAlpha(cc) || isAsciiLowerAlpha(cc)) {
2720
+ // ASCII alpha — advance the `"script"` match counter if the
2721
+ // lowercase form matches the next expected char, otherwise
2722
+ // snap to the sentinel so further chars can't revive a
2723
+ // match. No buffer allocation.
2724
+ const lower = isAsciiUpperAlpha(cc) ? cc + 0x20 : cc;
2725
+ if (scriptMatch < 6 && lower === "script".charCodeAt(scriptMatch)) {
2726
+ scriptMatch++;
2727
+ } else {
2728
+ scriptMatch = 7;
2729
+ }
2730
+ pos++;
2731
+ } else {
2732
+ // Anything else
2733
+ // Reconsume in the script data double escaped state.
2734
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2735
+ // Reconsume
2736
+ }
2737
+ break;
2738
+
2739
+ // https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
2740
+ case STATE_PLAINTEXT:
2741
+ // Consume the next input character:
2742
+ // Anything else
2743
+ // Emit the current input character as a character token.
2744
+ pos++;
2745
+ break;
2746
+
2747
+ // https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
2748
+ case STATE_CHARACTER_REFERENCE:
2749
+ // Set the temporary buffer to the empty string. Append a U+0026
2750
+ // AMPERSAND (&) character to the temporary buffer.
2751
+ // Consume the next input character:
2752
+ if (isAsciiAlphanumeric(cc)) {
2753
+ // ASCII alphanumeric
2754
+ // Reconsume in the named character reference state.
2755
+ state = STATE_NAMED_CHARACTER_REFERENCE;
2756
+ // Reconsume
2757
+ } else if (cc === CC_NUMBER_SIGN) {
2758
+ // U+0023 NUMBER SIGN (#)
2759
+ // Append the current input character to the temporary buffer.
2760
+ // Switch to the numeric character reference state.
2761
+ state = STATE_NUMERIC_CHARACTER_REFERENCE;
2762
+ pos++;
2763
+ } else {
2764
+ // Anything else
2765
+ // Flush code points consumed as a character reference.
2766
+ // Reconsume in the return state.
2767
+ state = returnState;
2768
+ // Reconsume
2769
+ }
2770
+ break;
2771
+
2772
+ // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
2773
+ case STATE_NAMED_CHARACTER_REFERENCE: {
2774
+ // Consume the maximum number of characters possible where the
2775
+ // consumed characters are one of the identifiers in the first
2776
+ // column of the named character references table.
2777
+ //
2778
+ // We measure the longest run of ASCII alphanumeric characters
2779
+ // (capped at MAX_ENTITY_NAME_LEN - 1 since the optional `;` is
2780
+ // handled separately), then walk that run from longest to
2781
+ // shortest looking for the first prefix that exists in the
2782
+ // entity table (with a trailing `;` if present, otherwise the
2783
+ // legacy bare form).
2784
+ let runLen = 0;
2785
+ while (
2786
+ pos + runLen < len &&
2787
+ isAsciiAlphanumeric(input.charCodeAt(pos + runLen)) &&
2788
+ runLen < MAX_ENTITY_NAME_LEN - 1
2789
+ ) {
2790
+ runLen++;
2791
+ }
2792
+ const hasSemicolon =
2793
+ pos + runLen < len && input.charCodeAt(pos + runLen) === CC_SEMICOLON;
2794
+ namedEntityConsumed = 0;
2795
+ for (let n = runLen; n > 0; n--) {
2796
+ // Try with trailing `;` first if one is present after the run.
2797
+ if (n === runLen && hasSemicolon) {
2798
+ const withSemi = `${input.slice(pos, pos + n)};`;
2799
+ if (HTML_ENTITIES[withSemi] !== undefined) {
2800
+ namedEntityConsumed = n + 1;
2801
+ break;
2802
+ }
2803
+ }
2804
+ const bare = input.slice(pos, pos + n);
2805
+ if (HTML_ENTITIES[bare] !== undefined) {
2806
+ namedEntityConsumed = n;
2807
+ break;
2808
+ }
2809
+ }
2810
+ if (namedEntityConsumed > 0) {
2811
+ pos += namedEntityConsumed;
2812
+ state = returnState;
2813
+ } else {
2814
+ // No match — flush code points consumed as a character
2815
+ // reference. Switch to the ambiguous ampersand state.
2816
+ state = STATE_AMBIGUOUS_AMPERSAND;
2817
+ }
2818
+ break;
2819
+ }
2820
+
2821
+ // https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
2822
+ case STATE_AMBIGUOUS_AMPERSAND:
2823
+ // Consume the next input character:
2824
+ if (isAsciiAlphanumeric(cc)) {
2825
+ // ASCII alphanumeric
2826
+ // If the character reference was consumed as part of an
2827
+ // attribute, then append the current input character to the
2828
+ // current attribute's value. Otherwise, emit the current
2829
+ // input character as a character token.
2830
+ pos++;
2831
+ } else if (cc === CC_SEMICOLON) {
2832
+ // U+003B SEMICOLON (;)
2833
+ // This is an unknown-named-character-reference parse error.
2834
+ // Reconsume in the return state.
2835
+ reportError(
2836
+ "unknown-named-character-reference",
2837
+ pos,
2838
+ pos + 1,
2839
+ "warning"
2840
+ );
2841
+ state = returnState;
2842
+ // Reconsume
2843
+ } else {
2844
+ // Anything else
2845
+ // Reconsume in the return state.
2846
+ state = returnState;
2847
+ // Reconsume
2848
+ }
2849
+ break;
2850
+
2851
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
2852
+ case STATE_NUMERIC_CHARACTER_REFERENCE:
2853
+ // Set the character reference code to zero (0).
2854
+ // Consume the next input character:
2855
+ if (cc === 0x78 || cc === 0x58) {
2856
+ // U+0078 LATIN SMALL LETTER X
2857
+ // U+0058 LATIN CAPITAL LETTER X
2858
+ // Append the current input character to the temporary
2859
+ // buffer. Switch to the hexadecimal character reference
2860
+ // start state.
2861
+ state = STATE_HEXADECIMAL_CHARACTER_REFERENCE_START;
2862
+ pos++;
2863
+ } else {
2864
+ // Anything else
2865
+ // Reconsume in the decimal character reference start state.
2866
+ state = STATE_DECIMAL_CHARACTER_REFERENCE_START;
2867
+ // Reconsume
2868
+ }
2869
+ break;
2870
+
2871
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
2872
+ case STATE_HEXADECIMAL_CHARACTER_REFERENCE_START:
2873
+ // Consume the next input character:
2874
+ // ASCII hex digit: reconsume in the hexadecimal character reference state.
2875
+ // Anything else: absence-of-digits-in-numeric-character-reference parse
2876
+ // error. Flush code points consumed as a character reference. Reconsume
2877
+ // in the return state.
2878
+ if (isAsciiHexDigit(cc)) {
2879
+ state = STATE_HEXADECIMAL_CHARACTER_REFERENCE;
2880
+ } else {
2881
+ reportError(
2882
+ "absence-of-digits-in-numeric-character-reference",
2883
+ pos,
2884
+ pos + 1,
2885
+ "warning"
2886
+ );
2887
+ state = returnState;
2888
+ }
2889
+ // Reconsume
2890
+ break;
2891
+
2892
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
2893
+ case STATE_DECIMAL_CHARACTER_REFERENCE_START:
2894
+ // Consume the next input character:
2895
+ // ASCII digit: reconsume in the decimal character reference state.
2896
+ // Anything else: absence-of-digits-in-numeric-character-reference parse
2897
+ // error. Flush code points consumed as a character reference. Reconsume
2898
+ // in the return state.
2899
+ if (isAsciiDigit(cc)) {
2900
+ state = STATE_DECIMAL_CHARACTER_REFERENCE;
2901
+ } else {
2902
+ reportError(
2903
+ "absence-of-digits-in-numeric-character-reference",
2904
+ pos,
2905
+ pos + 1,
2906
+ "warning"
2907
+ );
2908
+ state = returnState;
2909
+ }
2910
+ // Reconsume
2911
+ break;
2912
+
2913
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
2914
+ case STATE_HEXADECIMAL_CHARACTER_REFERENCE:
2915
+ // Consume the next input character:
2916
+ if (isAsciiHexDigit(cc)) {
2917
+ // ASCII digit / upper hex / lower hex
2918
+ // Multiply the character reference code by 16. Add a numeric
2919
+ // version of the current input character to the character
2920
+ // reference code.
2921
+ pos++;
2922
+ } else if (cc === CC_SEMICOLON) {
2923
+ // U+003B SEMICOLON
2924
+ // Switch to the numeric character reference end state.
2925
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2926
+ pos++;
2927
+ } else {
2928
+ // Anything else
2929
+ // This is a missing-semicolon-after-character-reference
2930
+ // parse error. Reconsume in the numeric character reference
2931
+ // end state.
2932
+ reportError(
2933
+ "missing-semicolon-after-character-reference",
2934
+ pos,
2935
+ pos + 1,
2936
+ "warning"
2937
+ );
2938
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2939
+ // Reconsume
2940
+ }
2941
+ break;
2942
+
2943
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
2944
+ case STATE_DECIMAL_CHARACTER_REFERENCE:
2945
+ // Consume the next input character:
2946
+ if (isAsciiDigit(cc)) {
2947
+ // ASCII digit
2948
+ // Multiply the character reference code by 10. Add a numeric
2949
+ // version of the current input character (subtract 0x0030
2950
+ // from the character's code point) to the character reference
2951
+ // code.
2952
+ pos++;
2953
+ } else if (cc === CC_SEMICOLON) {
2954
+ // U+003B SEMICOLON
2955
+ // Switch to the numeric character reference end state.
2956
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2957
+ pos++;
2958
+ } else {
2959
+ // Anything else
2960
+ // This is a missing-semicolon-after-character-reference
2961
+ // parse error. Reconsume in the numeric character reference
2962
+ // end state.
2963
+ reportError(
2964
+ "missing-semicolon-after-character-reference",
2965
+ pos,
2966
+ pos + 1,
2967
+ "warning"
2968
+ );
2969
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2970
+ // Reconsume
2971
+ }
2972
+ break;
2973
+
2974
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
2975
+ case STATE_NUMERIC_CHARACTER_REFERENCE_END:
2976
+ // Check the character reference code (validation omitted for
2977
+ // the scanner — we don't decode, just skip past the entity).
2978
+ // Flush code points consumed as a character reference.
2979
+ // Switch to the return state.
2980
+ state = returnState;
2981
+ // Reconsume
2982
+ break;
2983
+
2984
+ /* istanbul ignore next -- @preserve: defensive fallback, all states are explicit above */
2985
+ default:
2986
+ pos++;
2987
+ }
2988
+ }
2989
+
2990
+ // Handle EOF in non-data states per the WHATWG spec.
2991
+ //
2992
+ // Each in-progress comment / doctype / cdata / tag emits its partial
2993
+ // token range plus a corresponding `eof-in-X` parse error. Severity is
2994
+ // `"error"` because the emitted token offset range is incomplete (missing
2995
+ // trailing `-->`, `>`, `]]>`, etc.). For data / `<` / `</` / `<!`-only
2996
+ // inputs we emit `eof-before-tag-name` and fall through to flush the
2997
+ // pending text span (which still contains the lone `<`).
2998
+ // If EOF caught us inside a character-reference state, flush whatever the
2999
+ // scanner had consumed and resume in the return state so any in-progress
3000
+ // tag/comment is handled correctly by the branches below.
3001
+ if (
3002
+ state >= STATE_CHARACTER_REFERENCE &&
3003
+ state <= STATE_NUMERIC_CHARACTER_REFERENCE_END
3004
+ ) {
3005
+ state = returnState;
3006
+ }
3007
+
3008
+ if (
3009
+ (state >= STATE_TAG_NAME && state <= STATE_SELF_CLOSING_START_TAG) ||
3010
+ state === STATE_RCDATA_END_TAG_NAME ||
3011
+ state === STATE_RAWTEXT_END_TAG_NAME ||
3012
+ state === STATE_SCRIPT_DATA_END_TAG_NAME ||
3013
+ state === STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME
3014
+ ) {
3015
+ // EOF mid-tag — emit the partial open/close tag at EOF so the
3016
+ // consumer still sees the tag. This is a deliberate deviation
3017
+ // from the spec's per-character emission model: rather than
3018
+ // dropping the in-progress tag, we emit its offset range up to EOF.
3019
+ reportError("eof-in-tag", len, len, "error");
3020
+ // If we hit EOF mid-attribute-name, the name runs to EOF. Set
3021
+ // attrNameEnd here so the emitted attribute range is valid.
3022
+ if (state === STATE_ATTRIBUTE_NAME && attrNameStart !== -1) {
3023
+ attrNameEnd = len;
3024
+ }
3025
+ if (attrNameStart !== -1) emitAttribute(len);
3026
+ // If we hit EOF before the tag-name end was recorded, the name runs
3027
+ // to EOF. `tagNameEnd` may carry over from a previously emitted tag,
3028
+ // so reset it whenever it's missing or stale (less than `tagNameStart`)
3029
+ // — covers `<div` open-tag EOFs as well as `<title>x</tit` and other
3030
+ // content-mode end-tag-name EOFs.
3031
+ if (tagNameStart !== -1 && tagNameEnd < tagNameStart) {
3032
+ tagNameEnd = len;
3033
+ }
3034
+ flushText(tagStart);
3035
+ pos =
3036
+ input.charCodeAt(tagStart + 1) === CC_SOLIDUS
3037
+ ? emitCloseTag(len)
3038
+ : emitOpenTag(len, false);
3039
+ } else if (
3040
+ (state >= STATE_COMMENT_START && state <= STATE_BOGUS_COMMENT) ||
3041
+ (state >= STATE_COMMENT_LESS_THAN_SIGN &&
3042
+ state <= STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH) ||
3043
+ state === STATE_MARKUP_DECLARATION_OPEN
3044
+ ) {
3045
+ // Bogus comments at EOF are normal per spec (no parse error).
3046
+ if (state !== STATE_BOGUS_COMMENT) {
3047
+ reportError("eof-in-comment", len, len, "error");
3048
+ }
3049
+ if (callbacks.comment !== undefined) {
3050
+ pos = callbacks.comment(input, commentStart, len);
3051
+ }
3052
+ } else if (state >= STATE_CDATA_SECTION && state <= STATE_CDATA_SECTION_END) {
3053
+ reportError("eof-in-cdata", len, len, "error");
3054
+ if (callbacks.comment !== undefined) {
3055
+ pos = callbacks.comment(input, commentStart, len);
3056
+ }
3057
+ } else if (state >= STATE_DOCTYPE && state <= STATE_BOGUS_DOCTYPE) {
3058
+ reportError("eof-in-doctype", len, len, "error");
3059
+ if (callbacks.doctype !== undefined) {
3060
+ pos = callbacks.doctype(input, commentStart, len);
3061
+ }
3062
+ } else {
3063
+ if (
3064
+ state === STATE_SCRIPT_DATA_ESCAPED ||
3065
+ state === STATE_SCRIPT_DATA_ESCAPED_DASH ||
3066
+ state === STATE_SCRIPT_DATA_ESCAPED_DASH_DASH ||
3067
+ state === STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN ||
3068
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED ||
3069
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH ||
3070
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH ||
3071
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN ||
3072
+ state === STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END
3073
+ ) {
3074
+ // Inside `<script><!-- … ` at EOF — spec calls this an
3075
+ // eof-in-script-html-comment-like-text parse error. The
3076
+ // less-than-sign and double-escape-end states reconsume back
3077
+ // into the (double-)escaped state on EOF per spec, which then
3078
+ // hits this same error.
3079
+ reportError("eof-in-script-html-comment-like-text", len, len, "error");
3080
+ } else if (state === STATE_TAG_OPEN || state === STATE_END_TAG_OPEN) {
3081
+ // `<` or `</` with nothing after; spec calls this
3082
+ // eof-before-tag-name. The lone `<` / `</` is preserved in the
3083
+ // pending text span which is flushed below.
3084
+ reportError("eof-before-tag-name", len, len, "warning");
3085
+ }
3086
+ if (textStart < len && callbacks.text !== undefined) {
3087
+ callbacks.text(input, textStart, len);
3088
+ }
3089
+ }
3090
+
3091
+ return pos;
3092
+ };
3093
+
3094
+ walkHtmlTokens.QUOTE_NONE = QUOTE_NONE;
3095
+ walkHtmlTokens.QUOTE_SINGLE = QUOTE_SINGLE;
3096
+ walkHtmlTokens.QUOTE_DOUBLE = QUOTE_DOUBLE;
3097
+
3098
+ // WHATWG numeric-character-reference-end Windows-1252 remap table for the
3099
+ // 0x80-0x9F range. Per spec these C1 control code points decode to the
3100
+ // corresponding Windows-1252 glyph (with a parse error) rather than to the
3101
+ // raw C1 control character.
3102
+ const NUMERIC_C1_REMAP = {
3103
+ 0x80: "€",
3104
+ 0x82: "‚",
3105
+ 0x83: "ƒ",
3106
+ 0x84: "„",
3107
+ 0x85: "…",
3108
+ 0x86: "†",
3109
+ 0x87: "‡",
3110
+ 0x88: "ˆ",
3111
+ 0x89: "‰",
3112
+ 0x8a: "Š",
3113
+ 0x8b: "‹",
3114
+ 0x8c: "Œ",
3115
+ 0x8e: "Ž",
3116
+ 0x91: "‘",
3117
+ 0x92: "’",
3118
+ 0x93: "“",
3119
+ 0x94: "”",
3120
+ 0x95: "•",
3121
+ 0x96: "–",
3122
+ 0x97: "—",
3123
+ 0x98: "˜",
3124
+ 0x99: "™",
3125
+ 0x9a: "š",
3126
+ 0x9b: "›",
3127
+ 0x9c: "œ",
3128
+ 0x9e: "ž",
3129
+ 0x9f: "Ÿ"
3130
+ };
3131
+
3132
+ /**
3133
+ * @param {number} code numeric character reference code point
3134
+ * @returns {string} decoded character per WHATWG remap rules
3135
+ */
3136
+ const decodeNumericReference = (code) => {
3137
+ // Per WHATWG numeric-character-reference-end-state:
3138
+ // - 0x00, > 0x10FFFF, or surrogate (0xD800-0xDFFF) -> U+FFFD.
3139
+ // - 0x80-0x9F -> Windows-1252 remap (above).
3140
+ // - Anything else (including noncharacters and C0 controls) -> the
3141
+ // code point itself; we don't surface the spec's parse-error
3142
+ // classes here since decoding is happening after the scanner ran.
3143
+ if (code === 0 || code > 0x10ffff || (code >= 0xd800 && code <= 0xdfff)) {
3144
+ return "�";
3145
+ }
3146
+ if (code >= 0x80 && code <= 0x9f) {
3147
+ const remapped = /** @type {Record<number, string>} */ (NUMERIC_C1_REMAP)[
3148
+ code
3149
+ ];
3150
+ if (remapped !== undefined) return remapped;
3151
+ }
3152
+ return String.fromCodePoint(code);
3153
+ };
3154
+
3155
+ /**
3156
+ * Decode HTML character references in a string. Handles all numeric
3157
+ * references (with WHATWG remap of 0x00, surrogates, out-of-range, and the
3158
+ * C1 Windows-1252 table) and the full WHATWG named character references
3159
+ * table. Unknown or malformed references are left as literal text.
3160
+ *
3161
+ * When `isAttribute` is `true`, applies the WHATWG
3162
+ * "consumed-as-part-of-an-attribute" rule: a named reference without a
3163
+ * trailing `;` whose next character is `=` or ASCII alphanumeric is left
3164
+ * undecoded, so e.g. `&amp=foo` stays literal in an attribute value but
3165
+ * decodes to `&=foo` in text.
3166
+ * @param {string} str the raw string from the token slice
3167
+ * @param {boolean=} isAttribute true if `str` came from an attribute value
3168
+ * @returns {string} decoded string
3169
+ */
3170
+ walkHtmlTokens.decodeHtmlEntities = (str, isAttribute) => {
3171
+ if (!str.includes("&")) return str;
3172
+
3173
+ // Match one of three forms (each with an optional trailing `;`):
3174
+ // `&#x<hex>` - hex numeric reference (requires the `x`/`X`).
3175
+ // `&#<dec>` - decimal numeric reference (digits only).
3176
+ // `&<name>` - named reference (letter followed by alphanumerics).
3177
+ // The three alternatives are kept separate so a decimal reference like
3178
+ // `&#65b` doesn't greedily eat the trailing `b` as if it were hex.
3179
+ return str.replace(
3180
+ /&(?:#[xX][0-9a-fA-F]+|#[0-9]+|[a-zA-Z][a-zA-Z0-9]*);?/g,
3181
+ (match, offset, source) => {
3182
+ // Numeric reference: &#65; or &#x41;
3183
+ if (match.charCodeAt(1) === 0x23 /* # */) {
3184
+ const lastChar = match.charAt(match.length - 1);
3185
+ const isHex =
3186
+ match.charCodeAt(2) === 0x78 || match.charCodeAt(2) === 0x58;
3187
+ const body = isHex
3188
+ ? lastChar === ";"
3189
+ ? match.slice(3, -1)
3190
+ : match.slice(3)
3191
+ : lastChar === ";"
3192
+ ? match.slice(2, -1)
3193
+ : match.slice(2);
3194
+ // The regex above guarantees at least one digit in `body`,
3195
+ // so `parseInt` always returns a finite number here.
3196
+ return decodeNumericReference(Number.parseInt(body, isHex ? 16 : 10));
3197
+ }
3198
+
3199
+ // Named reference. Try the full captured name first, then
3200
+ // progressively shorter prefixes - this handles direct matches
3201
+ // like `&amp;` as well as WHATWG longest-prefix semantics where
3202
+ // e.g. `&notpre;` decodes as `&not` (a legacy bare entity)
3203
+ // followed by `pre;` as literal text.
3204
+ const name = match.slice(1);
3205
+ const matchEndsWithSemi = name.charCodeAt(name.length - 1) === 0x3b;
3206
+
3207
+ // Attribute-context guard: if the entity match didn't end with `;`
3208
+ // and the next character in the source is `=` or ASCII
3209
+ // alphanumeric, the WHATWG spec says to flush the literal text
3210
+ // rather than decode. The greedy regex already absorbed any
3211
+ // trailing alphanumerics, so the only candidate "next char" here
3212
+ // is `=` (or any non-alphanumeric).
3213
+ if (isAttribute && !matchEndsWithSemi) {
3214
+ const after = source.charCodeAt(offset + match.length);
3215
+ if (after === 0x3d /* = */) return match;
3216
+ }
3217
+
3218
+ // Cap the longest-prefix search at MAX_ENTITY_NAME_LEN so pathological
3219
+ // inputs like `&` + thousands of alphanumerics stay linear-time.
3220
+ // Anything past that cap can't possibly match and is appended
3221
+ // verbatim as part of `name.slice(i)`.
3222
+ const searchLen =
3223
+ name.length > MAX_ENTITY_NAME_LEN ? MAX_ENTITY_NAME_LEN : name.length;
3224
+ for (let i = searchLen; i > 0; i--) {
3225
+ const prefix = name.slice(0, i);
3226
+ if (HTML_ENTITIES[prefix] !== undefined) {
3227
+ // Attribute-context longest-prefix guard: if the matched
3228
+ // prefix doesn't end with `;` and the leftover starts with
3229
+ // an alphanumeric character, leave literal per WHATWG.
3230
+ // (The regex greedy-consumes alphanumerics, so any leftover
3231
+ // within `name` is itself alphanumeric — we only need to
3232
+ // check non-empty leftover here; the `=` case is handled
3233
+ // above against the source character after the match.)
3234
+ if (
3235
+ isAttribute &&
3236
+ i < name.length &&
3237
+ prefix.charCodeAt(prefix.length - 1) !== 0x3b
3238
+ ) {
3239
+ return match;
3240
+ }
3241
+ return HTML_ENTITIES[prefix] + name.slice(i);
3242
+ }
3243
+ }
3244
+ return match;
3245
+ }
3246
+ );
3247
+ };
3248
+
3249
+ module.exports = walkHtmlTokens;