webpack 5.106.2 → 5.107.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/README.md +2 -2
  2. package/lib/APIPlugin.js +1 -1
  3. package/lib/Cache.js +3 -6
  4. package/lib/CompatibilityPlugin.js +8 -7
  5. package/lib/Compilation.js +34 -26
  6. package/lib/Compiler.js +4 -13
  7. package/lib/ContextModule.js +2 -2
  8. package/lib/DefinePlugin.js +2 -2
  9. package/lib/Dependency.js +22 -1
  10. package/lib/DependencyTemplate.js +2 -1
  11. package/lib/EnvironmentPlugin.js +1 -1
  12. package/lib/EvalSourceMapDevToolPlugin.js +8 -9
  13. package/lib/ExternalModule.js +76 -15
  14. package/lib/ExternalModuleFactoryPlugin.js +5 -0
  15. package/lib/FileSystemInfo.js +187 -72
  16. package/lib/Generator.js +3 -3
  17. package/lib/HotModuleReplacementPlugin.js +26 -8
  18. package/lib/IgnorePlugin.js +2 -1
  19. package/lib/Module.js +19 -18
  20. package/lib/ModuleFactory.js +1 -1
  21. package/lib/ModuleSourceTypeConstants.js +31 -1
  22. package/lib/ModuleTypeConstants.js +12 -3
  23. package/lib/MultiCompiler.js +2 -2
  24. package/lib/NodeStuffPlugin.js +1 -1
  25. package/lib/NormalModule.js +13 -31
  26. package/lib/NormalModuleFactory.js +10 -2
  27. package/lib/Parser.js +1 -1
  28. package/lib/ProgressPlugin.js +129 -56
  29. package/lib/RuntimeGlobals.js +5 -5
  30. package/lib/RuntimeModule.js +9 -7
  31. package/lib/RuntimePlugin.js +11 -0
  32. package/lib/WarnCaseSensitiveModulesPlugin.js +70 -2
  33. package/lib/WarnDeprecatedOptionPlugin.js +1 -1
  34. package/lib/WarnNoModeSetPlugin.js +16 -1
  35. package/lib/Watching.js +2 -3
  36. package/lib/WebpackError.js +3 -77
  37. package/lib/WebpackIsIncludedPlugin.js +1 -1
  38. package/lib/WebpackOptionsApply.js +13 -1
  39. package/lib/asset/AssetBytesGenerator.js +6 -2
  40. package/lib/asset/AssetGenerator.js +22 -8
  41. package/lib/asset/AssetModulesPlugin.js +3 -1
  42. package/lib/asset/AssetSourceGenerator.js +6 -2
  43. package/lib/buildChunkGraph.js +4 -6
  44. package/lib/cache/PackFileCacheStrategy.js +4 -4
  45. package/lib/cli.js +3 -1
  46. package/lib/config/defaults.js +197 -10
  47. package/lib/config/normalization.js +3 -1
  48. package/lib/css/CssGenerator.js +320 -105
  49. package/lib/css/CssInjectStyleRuntimeModule.js +44 -42
  50. package/lib/css/CssLoadingRuntimeModule.js +22 -4
  51. package/lib/{CssModule.js → css/CssModule.js} +15 -15
  52. package/lib/css/CssModulesPlugin.js +166 -86
  53. package/lib/css/CssParser.js +566 -269
  54. package/lib/css/walkCssTokens.js +148 -2
  55. package/lib/dependencies/AMDRequireDependenciesBlockParserPlugin.js +1 -1
  56. package/lib/dependencies/CommonJsDependencyHelpers.js +63 -0
  57. package/lib/dependencies/CommonJsExportRequireDependency.js +54 -10
  58. package/lib/dependencies/CommonJsExportsParserPlugin.js +1 -1
  59. package/lib/dependencies/CommonJsFullRequireDependency.js +32 -9
  60. package/lib/dependencies/CommonJsImportsParserPlugin.js +4 -3
  61. package/lib/dependencies/CommonJsRequireDependency.js +67 -4
  62. package/lib/dependencies/ContextDependency.js +1 -1
  63. package/lib/dependencies/ContextDependencyHelpers.js +1 -1
  64. package/lib/dependencies/CreateRequireParserPlugin.js +1 -1
  65. package/lib/dependencies/CriticalDependencyWarning.js +1 -1
  66. package/lib/dependencies/CssIcssExportDependency.js +332 -67
  67. package/lib/dependencies/CssIcssImportDependency.js +49 -7
  68. package/lib/dependencies/CssIcssSymbolDependency.js +11 -3
  69. package/lib/dependencies/CssImportDependency.js +8 -0
  70. package/lib/dependencies/CssUrlDependency.js +25 -0
  71. package/lib/dependencies/HarmonyDetectionParserPlugin.js +1 -1
  72. package/lib/dependencies/HarmonyExportDependencyParserPlugin.js +8 -7
  73. package/lib/dependencies/HarmonyExportExpressionDependency.js +22 -14
  74. package/lib/dependencies/HarmonyExportImportedSpecifierDependency.js +110 -3
  75. package/lib/dependencies/HarmonyImportDependency.js +10 -2
  76. package/lib/dependencies/HarmonyImportDependencyParserPlugin.js +22 -1
  77. package/lib/dependencies/HarmonyImportSpecifierDependency.js +1 -1
  78. package/lib/{HarmonyLinkingError.js → dependencies/HarmonyLinkingError.js} +5 -3
  79. package/lib/dependencies/HtmlInlineScriptDependency.js +133 -0
  80. package/lib/dependencies/HtmlInlineStyleDependency.js +101 -0
  81. package/lib/dependencies/HtmlScriptSrcDependency.js +318 -0
  82. package/lib/dependencies/HtmlSourceDependency.js +127 -0
  83. package/lib/dependencies/ImportMetaContextDependencyParserPlugin.js +1 -1
  84. package/lib/dependencies/ImportParserPlugin.js +2 -2
  85. package/lib/dependencies/ImportPhase.js +1 -1
  86. package/lib/dependencies/RequireIncludeDependencyParserPlugin.js +1 -1
  87. package/lib/{RequireJsStuffPlugin.js → dependencies/RequireJsStuffPlugin.js} +7 -7
  88. package/lib/dependencies/SystemPlugin.js +1 -1
  89. package/lib/dependencies/WebAssemblyImportDependency.js +1 -1
  90. package/lib/dependencies/WorkerPlugin.js +2 -2
  91. package/lib/{DelegatedModule.js → dll/DelegatedModule.js} +31 -31
  92. package/lib/{DelegatedModuleFactoryPlugin.js → dll/DelegatedModuleFactoryPlugin.js} +4 -4
  93. package/lib/{DelegatedPlugin.js → dll/DelegatedPlugin.js} +2 -2
  94. package/lib/{DllEntryPlugin.js → dll/DllEntryPlugin.js} +4 -4
  95. package/lib/{DllModule.js → dll/DllModule.js} +24 -24
  96. package/lib/{DllModuleFactory.js → dll/DllModuleFactory.js} +4 -4
  97. package/lib/{DllPlugin.js → dll/DllPlugin.js} +6 -5
  98. package/lib/{DllReferencePlugin.js → dll/DllReferencePlugin.js} +14 -14
  99. package/lib/{LibManifestPlugin.js → dll/LibManifestPlugin.js} +9 -9
  100. package/lib/{AsyncDependencyToInitialChunkError.js → errors/AsyncDependencyToInitialChunkError.js} +2 -2
  101. package/lib/errors/BuildCycleError.js +1 -1
  102. package/lib/{ChunkRenderError.js → errors/ChunkRenderError.js} +1 -1
  103. package/lib/{CodeGenerationError.js → errors/CodeGenerationError.js} +1 -1
  104. package/lib/{CommentCompilationWarning.js → errors/CommentCompilationWarning.js} +3 -3
  105. package/lib/{ConcurrentCompilationError.js → errors/ConcurrentCompilationError.js} +4 -2
  106. package/lib/{EnvironmentNotSupportAsyncWarning.js → errors/EnvironmentNotSupportAsyncWarning.js} +4 -4
  107. package/lib/{HookWebpackError.js → errors/HookWebpackError.js} +5 -5
  108. package/lib/{IgnoreErrorModuleFactory.js → errors/IgnoreErrorModuleFactory.js} +4 -4
  109. package/lib/{InvalidDependenciesModuleWarning.js → errors/InvalidDependenciesModuleWarning.js} +3 -3
  110. package/lib/errors/JSONParseError.js +114 -0
  111. package/lib/{ModuleBuildError.js → errors/ModuleBuildError.js} +5 -5
  112. package/lib/{ModuleDependencyError.js → errors/ModuleDependencyError.js} +2 -2
  113. package/lib/{ModuleDependencyWarning.js → errors/ModuleDependencyWarning.js} +4 -4
  114. package/lib/{ModuleError.js → errors/ModuleError.js} +5 -5
  115. package/lib/{ModuleHashingError.js → errors/ModuleHashingError.js} +1 -1
  116. package/lib/{ModuleNotFoundError.js → errors/ModuleNotFoundError.js} +2 -2
  117. package/lib/{ModuleParseError.js → errors/ModuleParseError.js} +8 -6
  118. package/lib/{ModuleRestoreError.js → errors/ModuleRestoreError.js} +1 -1
  119. package/lib/{ModuleStoreError.js → errors/ModuleStoreError.js} +1 -1
  120. package/lib/{ModuleWarning.js → errors/ModuleWarning.js} +5 -5
  121. package/lib/{NodeStuffInWebError.js → errors/NodeStuffInWebError.js} +4 -4
  122. package/lib/errors/NonErrorEmittedError.js +28 -0
  123. package/lib/{UnhandledSchemeError.js → errors/UnhandledSchemeError.js} +2 -2
  124. package/lib/{UnsupportedFeatureWarning.js → errors/UnsupportedFeatureWarning.js} +3 -3
  125. package/lib/errors/WebpackError.js +84 -0
  126. package/lib/html/HtmlGenerator.js +379 -0
  127. package/lib/html/HtmlModulesPlugin.js +433 -0
  128. package/lib/html/HtmlParser.js +1489 -0
  129. package/lib/html/walkHtmlTokens.js +2733 -0
  130. package/lib/ids/IdHelpers.js +2 -1
  131. package/lib/index.js +34 -15
  132. package/lib/javascript/JavascriptModulesPlugin.js +89 -8
  133. package/lib/javascript/JavascriptParser.js +197 -16
  134. package/lib/javascript/JavascriptParserHelpers.js +1 -1
  135. package/lib/json/JsonParser.js +7 -16
  136. package/lib/library/AbstractLibraryPlugin.js +1 -1
  137. package/lib/library/EnableLibraryPlugin.js +1 -1
  138. package/lib/{FalseIIFEUmdWarning.js → library/FalseIIFEUmdWarning.js} +1 -1
  139. package/lib/library/ModuleLibraryPlugin.js +74 -0
  140. package/lib/node/NodeEnvironmentPlugin.js +4 -2
  141. package/lib/node/nodeConsole.js +113 -64
  142. package/lib/optimize/ConcatenatedModule.js +51 -6
  143. package/lib/optimize/InnerGraph.js +1 -1
  144. package/lib/optimize/InnerGraphPlugin.js +11 -1
  145. package/lib/optimize/MinMaxSizeWarning.js +4 -4
  146. package/lib/optimize/ModuleConcatenationPlugin.js +15 -7
  147. package/lib/optimize/RealContentHashPlugin.js +89 -26
  148. package/lib/optimize/SideEffectsFlagPlugin.js +111 -3
  149. package/lib/optimize/SplitChunksPlugin.js +1 -1
  150. package/lib/performance/AssetsOverSizeLimitWarning.js +2 -2
  151. package/lib/performance/EntrypointsOverSizeLimitWarning.js +2 -2
  152. package/lib/performance/NoAsyncChunksWarning.js +5 -3
  153. package/lib/performance/SizeLimitsPlugin.js +1 -1
  154. package/lib/prefetch/ChunkPrefetchTriggerRuntimeModule.js +4 -1
  155. package/lib/rules/UseEffectRulePlugin.js +4 -3
  156. package/lib/runtime/MakeDeferredNamespaceObjectRuntime.js +119 -13
  157. package/lib/runtime/SetAnonymousDefaultNameRuntimeModule.js +35 -0
  158. package/lib/schemes/DataUriPlugin.js +13 -1
  159. package/lib/schemes/VirtualUrlPlugin.js +1 -1
  160. package/lib/serialization/SerializerMiddleware.js +2 -2
  161. package/lib/sharing/ConsumeSharedPlugin.js +2 -2
  162. package/lib/sharing/ConsumeSharedRuntimeModule.js +8 -4
  163. package/lib/sharing/ProvideSharedModule.js +1 -1
  164. package/lib/sharing/ProvideSharedPlugin.js +1 -1
  165. package/lib/sharing/resolveMatchedConfigs.js +1 -1
  166. package/lib/stats/DefaultStatsFactoryPlugin.js +2 -2
  167. package/lib/stats/DefaultStatsPresetPlugin.js +1 -1
  168. package/lib/stats/DefaultStatsPrinterPlugin.js +1 -1
  169. package/lib/stats/StatsFactory.js +1 -1
  170. package/lib/typescript/TypeScriptPlugin.js +210 -0
  171. package/lib/url/URLParserPlugin.js +2 -2
  172. package/lib/util/AsyncQueue.js +2 -2
  173. package/lib/util/Hash.js +2 -2
  174. package/lib/util/LocConverter.js +53 -0
  175. package/lib/util/SortableSet.js +1 -1
  176. package/lib/util/cleverMerge.js +2 -2
  177. package/lib/util/comparators.js +3 -3
  178. package/lib/util/concatenate.js +3 -3
  179. package/lib/util/conventions.js +42 -1
  180. package/lib/util/createMappings.js +118 -0
  181. package/lib/{formatLocation.js → util/formatLocation.js} +2 -2
  182. package/lib/{SizeFormatHelpers.js → util/formatSize.js} +3 -1
  183. package/lib/util/fs.js +8 -8
  184. package/lib/util/hash/md4.js +1 -1
  185. package/lib/util/hash/xxhash64.js +1 -1
  186. package/lib/util/identifier.js +48 -0
  187. package/lib/util/internalSerializables.js +35 -19
  188. package/lib/util/magicComment.js +10 -7
  189. package/lib/util/parseJson.js +2 -73
  190. package/lib/util/source.js +21 -0
  191. package/lib/util/topologicalSort.js +69 -0
  192. package/lib/wasm-async/AsyncWebAssemblyModulesPlugin.js +2 -2
  193. package/lib/wasm-async/AsyncWebAssemblyParser.js +1 -1
  194. package/lib/wasm-sync/UnsupportedWebAssemblyFeatureError.js +5 -3
  195. package/lib/wasm-sync/WasmFinalizeExportsPlugin.js +1 -1
  196. package/lib/wasm-sync/WebAssemblyInInitialChunkError.js +5 -3
  197. package/lib/webpack.js +3 -1
  198. package/package.json +22 -20
  199. package/schemas/WebpackOptions.check.js +1 -1
  200. package/schemas/WebpackOptions.json +118 -3
  201. package/schemas/plugins/{DllPlugin.check.d.ts → HtmlGeneratorOptions.check.d.ts} +1 -1
  202. package/schemas/plugins/HtmlGeneratorOptions.check.js +6 -0
  203. package/schemas/plugins/HtmlGeneratorOptions.json +3 -0
  204. package/schemas/plugins/ProgressPlugin.check.js +1 -1
  205. package/schemas/plugins/ProgressPlugin.json +22 -0
  206. package/schemas/plugins/{DllReferencePlugin.check.d.ts → css/CssAutoOrModuleParserOptions.check.d.ts} +1 -1
  207. package/schemas/plugins/css/CssAutoOrModuleParserOptions.check.js +6 -0
  208. package/schemas/plugins/css/CssAutoOrModuleParserOptions.json +3 -0
  209. package/schemas/plugins/dll/DllPlugin.check.d.ts +7 -0
  210. package/schemas/plugins/dll/DllReferencePlugin.check.d.ts +7 -0
  211. package/types.d.ts +810 -101
  212. package/lib/CaseSensitiveModulesWarning.js +0 -80
  213. package/lib/GraphHelpers.js +0 -49
  214. package/lib/NoModeWarning.js +0 -23
  215. package/lib/css/CssMergeStyleSheetsRuntimeModule.js +0 -57
  216. /package/lib/{AbstractMethodError.js → errors/AbstractMethodError.js} +0 -0
  217. /package/schemas/plugins/{DllPlugin.check.js → dll/DllPlugin.check.js} +0 -0
  218. /package/schemas/plugins/{DllPlugin.json → dll/DllPlugin.json} +0 -0
  219. /package/schemas/plugins/{DllReferencePlugin.check.js → dll/DllReferencePlugin.check.js} +0 -0
  220. /package/schemas/plugins/{DllReferencePlugin.json → dll/DllReferencePlugin.json} +0 -0
@@ -0,0 +1,2733 @@
1
+ /*
2
+ MIT License http://www.opensource.org/licenses/mit-license.php
3
+ Author Raj Aryan (based on SWC parser by Alexander Akait)
4
+ */
5
+
6
+ "use strict";
7
+
8
+ // cspell:ignore apos
9
+
10
+ const STATE_DATA = 0;
11
+ const STATE_TAG_OPEN = 1;
12
+ const STATE_END_TAG_OPEN = 2;
13
+ const STATE_TAG_NAME = 3;
14
+ const STATE_BEFORE_ATTRIBUTE_NAME = 4;
15
+ const STATE_ATTRIBUTE_NAME = 5;
16
+ const STATE_AFTER_ATTRIBUTE_NAME = 6;
17
+ const STATE_BEFORE_ATTRIBUTE_VALUE = 7;
18
+ const STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
19
+ const STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
20
+ const STATE_ATTRIBUTE_VALUE_UNQUOTED = 10;
21
+ const STATE_AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
22
+ const STATE_SELF_CLOSING_START_TAG = 12;
23
+
24
+ const STATE_MARKUP_DECLARATION_OPEN = 13;
25
+ const STATE_COMMENT_START = 14;
26
+ const STATE_COMMENT_START_DASH = 15;
27
+ const STATE_COMMENT = 16;
28
+ const STATE_COMMENT_END_DASH = 17;
29
+ const STATE_COMMENT_END = 18;
30
+ const STATE_COMMENT_END_BANG = 19;
31
+ const STATE_BOGUS_COMMENT = 20;
32
+
33
+ const STATE_COMMENT_LESS_THAN_SIGN = 21;
34
+ const STATE_COMMENT_LESS_THAN_SIGN_BANG = 22;
35
+ const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH = 23;
36
+ const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 24;
37
+
38
+ const STATE_DOCTYPE = 25;
39
+ const STATE_BEFORE_DOCTYPE_NAME = 26;
40
+ const STATE_DOCTYPE_NAME = 27;
41
+ const STATE_AFTER_DOCTYPE_NAME = 28;
42
+ const STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD = 29;
43
+ const STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 30;
44
+ const STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 31;
45
+ const STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 32;
46
+ const STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 33;
47
+ const STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 34;
48
+ const STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD = 35;
49
+ const STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 36;
50
+ const STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 37;
51
+ const STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 38;
52
+ const STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 39;
53
+ const STATE_BOGUS_DOCTYPE = 40;
54
+
55
+ const STATE_CDATA_SECTION = 41;
56
+ const STATE_CDATA_SECTION_BRACKET = 42;
57
+ const STATE_CDATA_SECTION_END = 43;
58
+
59
+ const STATE_RCDATA = 44;
60
+ const STATE_RCDATA_LESS_THAN_SIGN = 45;
61
+ const STATE_RCDATA_END_TAG_OPEN = 46;
62
+ const STATE_RCDATA_END_TAG_NAME = 47;
63
+
64
+ const STATE_RAWTEXT = 48;
65
+ const STATE_RAWTEXT_LESS_THAN_SIGN = 49;
66
+ const STATE_RAWTEXT_END_TAG_OPEN = 50;
67
+ const STATE_RAWTEXT_END_TAG_NAME = 51;
68
+
69
+ const STATE_SCRIPT_DATA = 52;
70
+ const STATE_SCRIPT_DATA_LESS_THAN_SIGN = 53;
71
+ const STATE_SCRIPT_DATA_END_TAG_OPEN = 54;
72
+ const STATE_SCRIPT_DATA_END_TAG_NAME = 55;
73
+ const STATE_SCRIPT_DATA_ESCAPE_START = 56;
74
+ const STATE_SCRIPT_DATA_ESCAPE_START_DASH = 57;
75
+ const STATE_SCRIPT_DATA_ESCAPED = 58;
76
+ const STATE_SCRIPT_DATA_ESCAPED_DASH = 59;
77
+ const STATE_SCRIPT_DATA_ESCAPED_DASH_DASH = 60;
78
+ const STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 61;
79
+ const STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 62;
80
+ const STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME = 63;
81
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START = 64;
82
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED = 65;
83
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 66;
84
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 67;
85
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 68;
86
+ const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END = 69;
87
+
88
+ const STATE_PLAINTEXT = 70;
89
+
90
+ // https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
91
+ const STATE_CHARACTER_REFERENCE = 71;
92
+ // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
93
+ const STATE_NAMED_CHARACTER_REFERENCE = 72;
94
+ // https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
95
+ const STATE_AMBIGUOUS_AMPERSAND = 73;
96
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
97
+ const STATE_NUMERIC_CHARACTER_REFERENCE = 74;
98
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
99
+ const STATE_HEXADECIMAL_CHARACTER_REFERENCE_START = 75;
100
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
101
+ const STATE_DECIMAL_CHARACTER_REFERENCE_START = 76;
102
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
103
+ const STATE_HEXADECIMAL_CHARACTER_REFERENCE = 77;
104
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
105
+ const STATE_DECIMAL_CHARACTER_REFERENCE = 78;
106
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
107
+ const STATE_NUMERIC_CHARACTER_REFERENCE_END = 79;
108
+
109
+ const CC_TAB = 0x09;
110
+ const CC_LF = 0x0a;
111
+ const CC_FF = 0x0c;
112
+ const CC_SPACE = 0x20;
113
+ const CC_EXCLAMATION_MARK = 0x21;
114
+ const CC_QUOTATION_MARK = 0x22;
115
+ const CC_NUMBER_SIGN = 0x23;
116
+ const CC_AMPERSAND = 0x26;
117
+ const CC_APOSTROPHE = 0x27;
118
+ const CC_HYPHEN_MINUS = 0x2d;
119
+ const CC_SOLIDUS = 0x2f;
120
+ const CC_SEMICOLON = 0x3b;
121
+ const CC_LESS_THAN = 0x3c;
122
+ const CC_EQUALS = 0x3d;
123
+ const CC_GREATER_THAN = 0x3e;
124
+ const CC_QUESTION_MARK = 0x3f;
125
+ const CC_LEFT_SQUARE_BRACKET = 0x5b;
126
+ const CC_RIGHT_SQUARE_BRACKET = 0x5d;
127
+
128
+ const QUOTE_DOUBLE = 1;
129
+ const QUOTE_SINGLE = 2;
130
+ const QUOTE_NONE = 0;
131
+
132
+ /**
133
+ * @param {number} cc character code
134
+ * @returns {boolean} is ascii alpha
135
+ */
136
+ const isAsciiAlpha = (cc) =>
137
+ (cc >= 0x41 && cc <= 0x5a) || (cc >= 0x61 && cc <= 0x7a);
138
+
139
+ /**
140
+ * @param {number} cc character code
141
+ * @returns {boolean} is ascii alphanumeric
142
+ */
143
+ const isAsciiAlphanumeric = (cc) =>
144
+ isAsciiAlpha(cc) || (cc >= 0x30 && cc <= 0x39);
145
+
146
+ /**
147
+ * @param {number} cc character code
148
+ * @returns {boolean} is ascii digit
149
+ */
150
+ const isAsciiDigit = (cc) => cc >= 0x30 && cc <= 0x39;
151
+
152
+ /**
153
+ * @param {number} cc character code
154
+ * @returns {boolean} is ascii hex digit
155
+ */
156
+ const isAsciiHexDigit = (cc) =>
157
+ (cc >= 0x30 && cc <= 0x39) ||
158
+ (cc >= 0x41 && cc <= 0x46) ||
159
+ (cc >= 0x61 && cc <= 0x66);
160
+
161
+ /**
162
+ * @param {number} cc character code
163
+ * @returns {boolean} is space
164
+ */
165
+ const isSpace = (cc) =>
166
+ cc === CC_TAB || cc === CC_LF || cc === CC_FF || cc === CC_SPACE;
167
+
168
+ /**
169
+ * @typedef {object} HtmlTokenCallbacks
170
+ * @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number, selfClosing: boolean) => number=} openTag
171
+ * @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number) => number=} closeTag
172
+ * @property {(input: string, start: number, end: number) => number=} text
173
+ * @property {(input: string, nameStart: number, nameEnd: number, valueStart: number, valueEnd: number, quoteType: number) => number=} attribute
174
+ * @property {(input: string, start: number, end: number) => number=} comment
175
+ * @property {(input: string, start: number, end: number) => number=} doctype
176
+ */
177
+
178
+ /**
179
+ * @param {string} input input string
180
+ * @param {number} pos current position
181
+ * @param {HtmlTokenCallbacks} callbacks callbacks
182
+ * @returns {number} final position
183
+ */
184
+ const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
185
+ const len = input.length;
186
+ let state = STATE_DATA;
187
+ let returnState = STATE_DATA;
188
+
189
+ let textStart = pos;
190
+ let tagStart = pos;
191
+ let tagNameStart = -1;
192
+ let tagNameEnd = -1;
193
+ let attrNameStart = -1;
194
+ let attrNameEnd = -1;
195
+ let attrValueStart = -1;
196
+ let attrQuoteType = QUOTE_NONE;
197
+ let commentStart = pos;
198
+ let lastOpenTagName = "";
199
+ let tempBuffer = "";
200
+ let namedEntityConsumed = 0;
201
+
202
+ /**
203
+ * @param {number} cc character code
204
+ * @returns {boolean} is ascii lower alpha
205
+ */
206
+ const isAsciiLowerAlpha = (cc) => cc >= 0x61 && cc <= 0x7a;
207
+
208
+ /**
209
+ * @param {number} cc character code
210
+ * @returns {boolean} is ascii upper alpha
211
+ */
212
+ const isAsciiUpperAlpha = (cc) => cc >= 0x41 && cc <= 0x5a;
213
+
214
+ /**
215
+ * @param {string} name tag name (lowercase)
216
+ * @returns {number} content mode state for this tag, or STATE_DATA
217
+ */
218
+ const getContentModeForTag = (name) => {
219
+ switch (name) {
220
+ case "textarea":
221
+ case "title":
222
+ return STATE_RCDATA;
223
+ case "style":
224
+ case "xmp":
225
+ case "iframe":
226
+ case "noembed":
227
+ case "noframes":
228
+ return STATE_RAWTEXT;
229
+ case "script":
230
+ return STATE_SCRIPT_DATA;
231
+ case "plaintext":
232
+ return STATE_PLAINTEXT;
233
+ default:
234
+ return STATE_DATA;
235
+ }
236
+ };
237
+
238
+ /**
239
+ * @param {number} endPos end position
240
+ */
241
+ const flushText = (endPos) => {
242
+ if (textStart < endPos && callbacks.text !== undefined) {
243
+ callbacks.text(input, textStart, endPos);
244
+ }
245
+ };
246
+
247
+ /**
248
+ * @param {number} endPos end position
249
+ * @returns {number} next position
250
+ */
251
+ const emitAttribute = (endPos) => {
252
+ let nextPos = endPos;
253
+ if (callbacks.attribute !== undefined && attrNameStart !== -1) {
254
+ nextPos = callbacks.attribute(
255
+ input,
256
+ attrNameStart,
257
+ attrNameEnd,
258
+ attrValueStart,
259
+ attrValueStart === -1 ? -1 : endPos,
260
+ attrQuoteType
261
+ );
262
+ }
263
+ attrNameStart = -1;
264
+ attrValueStart = -1;
265
+ attrQuoteType = QUOTE_NONE;
266
+ return nextPos;
267
+ };
268
+
269
+ /**
270
+ * @param {number} endPos end position
271
+ * @param {boolean} selfClosing is self closing
272
+ * @returns {number} next position
273
+ */
274
+ const emitOpenTag = (endPos, selfClosing) => {
275
+ let nextPos = endPos;
276
+ if (callbacks.openTag !== undefined) {
277
+ nextPos = callbacks.openTag(
278
+ input,
279
+ tagStart,
280
+ endPos,
281
+ tagNameStart,
282
+ tagNameEnd,
283
+ selfClosing
284
+ );
285
+ }
286
+ if (!selfClosing) {
287
+ lastOpenTagName = input.slice(tagNameStart, tagNameEnd).toLowerCase();
288
+ }
289
+ textStart = nextPos;
290
+ return nextPos;
291
+ };
292
+
293
+ /**
294
+ * @param {number} endPos end position
295
+ * @returns {number} next position
296
+ */
297
+ const emitCloseTag = (endPos) => {
298
+ let nextPos = endPos;
299
+ if (callbacks.closeTag !== undefined) {
300
+ nextPos = callbacks.closeTag(
301
+ input,
302
+ tagStart,
303
+ endPos,
304
+ tagNameStart,
305
+ tagNameEnd
306
+ );
307
+ }
308
+ textStart = nextPos;
309
+ return nextPos;
310
+ };
311
+
312
+ while (pos < len) {
313
+ const cc = input.charCodeAt(pos);
314
+
315
+ // TODO: We don't handle all states here yet. In the future we will need to handle
316
+ // all of them, and when we move all the tokenizer we will remove it.
317
+ switch (state) {
318
+ // https://html.spec.whatwg.org/multipage/parsing.html#data-state
319
+ case STATE_DATA:
320
+ // Consume the next input character:
321
+ // U+003C LESS-THAN SIGN (<)
322
+ // Set the return state to the data state. Switch to the tag open state.
323
+ if (cc === CC_LESS_THAN) {
324
+ tagStart = pos;
325
+ state = STATE_TAG_OPEN;
326
+ pos++;
327
+ } else if (cc === CC_AMPERSAND) {
328
+ // U+0026 AMPERSAND (&)
329
+ // Set the return state to the data state. Switch to the
330
+ // character reference state.
331
+ returnState = STATE_DATA;
332
+ state = STATE_CHARACTER_REFERENCE;
333
+ pos++;
334
+ } else {
335
+ pos++;
336
+ }
337
+ break;
338
+
339
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
340
+ case STATE_TAG_OPEN:
341
+ // Consume the next input character:
342
+ // U+002F SOLIDUS (/)
343
+ // Switch to the end tag open state.
344
+ if (cc === CC_SOLIDUS) {
345
+ state = STATE_END_TAG_OPEN;
346
+ pos++;
347
+ } else if (cc === CC_EXCLAMATION_MARK) {
348
+ // U+0021 EXCLAMATION MARK (!)
349
+ // Switch to the markup declaration open state.
350
+ flushText(tagStart);
351
+ state = STATE_MARKUP_DECLARATION_OPEN;
352
+ pos++;
353
+ } else if (isAsciiAlpha(cc)) {
354
+ // ASCII alpha
355
+ // Create a new start tag token, set its tag name to the empty string.
356
+ // Reconsume in the tag name state.
357
+ flushText(tagStart);
358
+ tagNameStart = pos;
359
+ state = STATE_TAG_NAME;
360
+ // Reconsume
361
+ } else if (cc === CC_QUESTION_MARK) {
362
+ // U+003F QUESTION MARK (?)
363
+ // This is an unexpected-question-mark-instead-of-tag-name parse error.
364
+ // Create a comment token whose data is the empty string. Reconsume in the
365
+ // bogus comment state.
366
+ flushText(tagStart);
367
+ commentStart = tagStart;
368
+ state = STATE_BOGUS_COMMENT;
369
+ pos++;
370
+ // Anything else
371
+ // This is an invalid-first-character-of-tag-name parse error. Emit a U+003C
372
+ // LESS-THAN SIGN character token. Reconsume in the data state.
373
+ } else {
374
+ state = STATE_DATA;
375
+ // Reconsume
376
+ }
377
+ break;
378
+
379
+ // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
380
+ case STATE_END_TAG_OPEN:
381
+ // Consume the next input character:
382
+ // ASCII alpha
383
+ // Create a new end tag token, set its tag name to the empty string.
384
+ // Reconsume in the tag name state.
385
+ if (isAsciiAlpha(cc)) {
386
+ flushText(tagStart);
387
+ tagNameStart = pos;
388
+ state = STATE_TAG_NAME;
389
+ // Reconsume
390
+ } else if (cc === CC_GREATER_THAN) {
391
+ // U+003E GREATER-THAN SIGN (>)
392
+ // This is a missing-end-tag-name parse error. Switch to the data state.
393
+ state = STATE_DATA;
394
+ pos++;
395
+ } else {
396
+ // Anything else
397
+ // This is an invalid-first-character-of-tag-name parse error. Create a
398
+ // comment token whose data is the empty string. Reconsume in the bogus
399
+ // comment state.
400
+ flushText(tagStart);
401
+ commentStart = tagStart;
402
+ state = STATE_BOGUS_COMMENT;
403
+ pos++;
404
+ }
405
+ break;
406
+
407
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
408
+ case STATE_TAG_NAME:
409
+ // Consume the next input character:
410
+ // U+0009 CHARACTER TABULATION (tab)
411
+ // U+000A LINE FEED (LF)
412
+ // U+000C FORM FEED (FF)
413
+ // U+0020 SPACE
414
+ // Switch to the before attribute name state.
415
+ if (isSpace(cc)) {
416
+ tagNameEnd = pos;
417
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
418
+ pos++;
419
+ } else if (cc === CC_SOLIDUS) {
420
+ // U+002F SOLIDUS (/)
421
+ // Switch to the self-closing start tag state.
422
+ tagNameEnd = pos;
423
+ state = STATE_SELF_CLOSING_START_TAG;
424
+ pos++;
425
+ } else if (cc === CC_GREATER_THAN) {
426
+ // U+003E GREATER-THAN SIGN (>)
427
+ // Switch to the data state. Emit the current tag token.
428
+ tagNameEnd = pos;
429
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
430
+ state = STATE_DATA;
431
+ pos = emitCloseTag(pos + 1);
432
+ } else {
433
+ const nextPos = emitOpenTag(pos + 1, false);
434
+ state =
435
+ nextPos > pos + 1
436
+ ? STATE_DATA
437
+ : getContentModeForTag(lastOpenTagName);
438
+ pos = nextPos;
439
+ }
440
+ } else {
441
+ pos++;
442
+ }
443
+ break;
444
+
445
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
446
+ case STATE_BEFORE_ATTRIBUTE_NAME:
447
+ // Consume the next input character:
448
+ // U+0009 CHARACTER TABULATION (tab)
449
+ // U+000A LINE FEED (LF)
450
+ // U+000C FORM FEED (FF)
451
+ // U+0020 SPACE
452
+ // Ignore the character.
453
+ // Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
454
+ if (isSpace(cc)) {
455
+ pos++;
456
+ } else if (cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
457
+ // U+002F SOLIDUS (/)
458
+ // U+003E GREATER-THAN SIGN (>)
459
+ // EOF
460
+ // Reconsume in the after attribute name state.
461
+ state = STATE_AFTER_ATTRIBUTE_NAME;
462
+ // Reconsume
463
+ } else if (cc === CC_EQUALS) {
464
+ attrNameStart = pos;
465
+ state = STATE_ATTRIBUTE_NAME;
466
+ pos++;
467
+ } else {
468
+ // Anything else
469
+ // Start a new attribute in the current tag token. Set that attribute name
470
+ // and value to the empty string. Reconsume in the attribute name state.
471
+ attrNameStart = pos;
472
+ state = STATE_ATTRIBUTE_NAME;
473
+ // Reconsume
474
+ }
475
+ break;
476
+
477
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
478
+ case STATE_ATTRIBUTE_NAME:
479
+ // Consume the next input character:
480
+ // U+0009 CHARACTER TABULATION (tab)
481
+ // U+000A LINE FEED (LF)
482
+ // U+000C FORM FEED (FF)
483
+ // U+0020 SPACE
484
+ // U+002F SOLIDUS (/)
485
+ // U+003E GREATER-THAN SIGN (>)
486
+ // EOF
487
+ // Reconsume in the after attribute name state.
488
+ if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
489
+ attrNameEnd = pos;
490
+ state = STATE_AFTER_ATTRIBUTE_NAME;
491
+ // Reconsume
492
+ } else if (cc === CC_EQUALS) {
493
+ attrNameEnd = pos;
494
+ state = STATE_BEFORE_ATTRIBUTE_VALUE;
495
+ pos++;
496
+ } else {
497
+ pos++;
498
+ }
499
+ break;
500
+
501
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
502
+ case STATE_AFTER_ATTRIBUTE_NAME:
503
+ // Consume the next input character:
504
+ // U+0009 CHARACTER TABULATION (tab)
505
+ // U+000A LINE FEED (LF)
506
+ // U+000C FORM FEED (FF)
507
+ // U+0020 SPACE
508
+ // Ignore the character.
509
+ if (isSpace(cc)) {
510
+ pos++;
511
+ } else if (cc === CC_SOLIDUS) {
512
+ // U+002F SOLIDUS (/)
513
+ // Switch to the self-closing start tag state.
514
+ emitAttribute(pos);
515
+ state = STATE_SELF_CLOSING_START_TAG;
516
+ pos++;
517
+ } else if (cc === CC_EQUALS) {
518
+ // U+003D EQUALS SIGN (=)
519
+ // Switch to the before attribute value state.
520
+ state = STATE_BEFORE_ATTRIBUTE_VALUE;
521
+ pos++;
522
+ } else if (cc === CC_GREATER_THAN) {
523
+ // U+003E GREATER-THAN SIGN (>)
524
+ // Switch to the data state. Emit the current tag token.
525
+ emitAttribute(pos);
526
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
527
+ state = STATE_DATA;
528
+ pos = emitCloseTag(pos + 1);
529
+ } else {
530
+ const nextPos = emitOpenTag(pos + 1, false);
531
+ state =
532
+ nextPos > pos + 1
533
+ ? STATE_DATA
534
+ : getContentModeForTag(lastOpenTagName);
535
+ pos = nextPos;
536
+ }
537
+ } else {
538
+ // Anything else
539
+ // Start a new attribute in the current tag token.
540
+ emitAttribute(pos);
541
+ attrNameStart = pos;
542
+ state = STATE_ATTRIBUTE_NAME;
543
+ // Reconsume
544
+ }
545
+ break;
546
+
547
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
548
+ case STATE_BEFORE_ATTRIBUTE_VALUE:
549
+ // Consume the next input character:
550
+ // U+0009 CHARACTER TABULATION (tab)
551
+ // U+000A LINE FEED (LF)
552
+ // U+000C FORM FEED (FF)
553
+ // U+0020 SPACE
554
+ // Ignore the character.
555
+ if (isSpace(cc)) {
556
+ pos++;
557
+ } else if (cc === CC_QUOTATION_MARK) {
558
+ // U+0022 QUOTATION MARK (")
559
+ // Switch to the attribute value (double-quoted) state.
560
+ attrValueStart = pos + 1;
561
+ attrQuoteType = QUOTE_DOUBLE;
562
+ state = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
563
+ pos++;
564
+ } else if (cc === CC_APOSTROPHE) {
565
+ // U+0027 APOSTROPHE (')
566
+ // Switch to the attribute value (single-quoted) state.
567
+ attrValueStart = pos + 1;
568
+ attrQuoteType = QUOTE_SINGLE;
569
+ state = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
570
+ pos++;
571
+ } else if (cc === CC_GREATER_THAN) {
572
+ // U+003E GREATER-THAN SIGN (>)
573
+ // Switch to the data state. Emit the current tag token.
574
+ pos = emitAttribute(pos);
575
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
576
+ state = STATE_DATA;
577
+ pos = emitCloseTag(pos + 1);
578
+ } else {
579
+ const nextPos = emitOpenTag(pos + 1, false);
580
+ state =
581
+ nextPos > pos + 1
582
+ ? STATE_DATA
583
+ : getContentModeForTag(lastOpenTagName);
584
+ pos = nextPos;
585
+ }
586
+ } else {
587
+ // Anything else
588
+ // Reconsume in the attribute value (unquoted) state.
589
+ attrValueStart = pos;
590
+ attrQuoteType = QUOTE_NONE;
591
+ state = STATE_ATTRIBUTE_VALUE_UNQUOTED;
592
+ // Reconsume
593
+ }
594
+ break;
595
+
596
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
597
+ case STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED:
598
+ // Consume the next input character:
599
+ // U+0022 QUOTATION MARK (")
600
+ // Switch to the after attribute value (quoted) state.
601
+ if (cc === CC_QUOTATION_MARK) {
602
+ pos = emitAttribute(pos);
603
+ state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
604
+ } else if (cc === CC_AMPERSAND) {
605
+ // U+0026 AMPERSAND (&)
606
+ // Set the return state to the attribute value (double-quoted)
607
+ // state. Switch to the character reference state.
608
+ returnState = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
609
+ state = STATE_CHARACTER_REFERENCE;
610
+ pos++;
611
+ } else {
612
+ pos++;
613
+ }
614
+ break;
615
+
616
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
617
+ case STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED:
618
+ // Consume the next input character:
619
+ // U+0027 APOSTROPHE (')
620
+ // Switch to the after attribute value (quoted) state.
621
+ if (cc === CC_APOSTROPHE) {
622
+ pos = emitAttribute(pos);
623
+ state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
624
+ } else if (cc === CC_AMPERSAND) {
625
+ // U+0026 AMPERSAND (&)
626
+ // Set the return state to the attribute value (single-quoted)
627
+ // state. Switch to the character reference state.
628
+ returnState = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
629
+ state = STATE_CHARACTER_REFERENCE;
630
+ pos++;
631
+ } else {
632
+ pos++;
633
+ }
634
+ break;
635
+
636
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
637
+ case STATE_ATTRIBUTE_VALUE_UNQUOTED:
638
+ if (isSpace(cc)) {
639
+ pos = emitAttribute(pos);
640
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
641
+ // Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
642
+ } else if (cc === CC_GREATER_THAN) {
643
+ // U+003E GREATER-THAN SIGN (>)
644
+ // This is a missing-attribute-value parse error. Switch to the data state.
645
+ // Emit the current tag token.
646
+ pos = emitAttribute(pos);
647
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
648
+ state = STATE_DATA;
649
+ pos = emitCloseTag(pos + 1);
650
+ } else {
651
+ const nextPos = emitOpenTag(pos + 1, false);
652
+ state =
653
+ nextPos > pos + 1
654
+ ? STATE_DATA
655
+ : getContentModeForTag(lastOpenTagName);
656
+ pos = nextPos;
657
+ }
658
+ } else if (cc === CC_AMPERSAND) {
659
+ // U+0026 AMPERSAND (&)
660
+ // Set the return state to the attribute value (unquoted)
661
+ // state. Switch to the character reference state.
662
+ returnState = STATE_ATTRIBUTE_VALUE_UNQUOTED;
663
+ state = STATE_CHARACTER_REFERENCE;
664
+ pos++;
665
+ } else {
666
+ pos++;
667
+ }
668
+ break;
669
+
670
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
671
+ case STATE_AFTER_ATTRIBUTE_VALUE_QUOTED:
672
+ // Consume the next input character:
673
+ // U+0009 CHARACTER TABULATION (tab)
674
+ // U+000A LINE FEED (LF)
675
+ // U+000C FORM FEED (FF)
676
+ // U+0020 SPACE
677
+ // Switch to the before attribute name state.
678
+ if (isSpace(cc)) {
679
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
680
+ pos++;
681
+ } else if (cc === CC_SOLIDUS) {
682
+ // U+002F SOLIDUS (/)
683
+ // Switch to the self-closing start tag state.
684
+ state = STATE_SELF_CLOSING_START_TAG;
685
+ pos++;
686
+ } else if (cc === CC_GREATER_THAN) {
687
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
688
+ state = STATE_DATA;
689
+ pos = emitCloseTag(pos + 1);
690
+ } else {
691
+ const nextPos = emitOpenTag(pos + 1, false);
692
+ state =
693
+ nextPos > pos + 1
694
+ ? STATE_DATA
695
+ : getContentModeForTag(lastOpenTagName);
696
+ pos = nextPos;
697
+ }
698
+ } else {
699
+ // Anything else
700
+ // This is a missing-whitespace-between-attributes parse error. Reconsume in
701
+ // the before attribute name state.
702
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
703
+ // Reconsume
704
+ }
705
+ break;
706
+
707
+ // https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
708
+ case STATE_SELF_CLOSING_START_TAG:
709
+ // Consume the next input character:
710
+ // U+003E GREATER-THAN SIGN (>)
711
+ // Set the self-closing flag of the current tag token. Switch to the data
712
+ // state. Emit the current tag token.
713
+ if (cc === CC_GREATER_THAN) {
714
+ if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
715
+ state = STATE_DATA;
716
+ pos = emitCloseTag(pos + 1);
717
+ } else {
718
+ pos = emitOpenTag(pos + 1, true);
719
+ state = STATE_DATA;
720
+ }
721
+ } else {
722
+ // Anything else
723
+ // This is an unexpected-solidus-in-tag parse error. Reconsume in the before
724
+ // attribute name state.
725
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
726
+ // Reconsume
727
+ }
728
+ break;
729
+
730
+ // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
731
+ case STATE_MARKUP_DECLARATION_OPEN:
732
+ // If the next few characters are:
733
+ // Two U+002D HYPHEN-MINUS characters (-)
734
+ // Consume those two characters, create a comment token whose data
735
+ // is the empty string, and switch to the comment start state.
736
+ if (
737
+ cc === CC_HYPHEN_MINUS &&
738
+ input.charCodeAt(pos + 1) === CC_HYPHEN_MINUS
739
+ ) {
740
+ pos += 2;
741
+ commentStart = tagStart;
742
+ state = STATE_COMMENT_START;
743
+ } else if (
744
+ // ASCII case-insensitive match for the word "DOCTYPE"
745
+ // Consume those characters and switch to the DOCTYPE state.
746
+ (cc === 0x44 || cc === 0x64) /* D or d */ &&
747
+ (input.charCodeAt(pos + 1) | 0x20) === 0x6f /* o */ &&
748
+ (input.charCodeAt(pos + 2) | 0x20) === 0x63 /* c */ &&
749
+ (input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
750
+ (input.charCodeAt(pos + 4) | 0x20) === 0x79 /* y */ &&
751
+ (input.charCodeAt(pos + 5) | 0x20) === 0x70 /* p */ &&
752
+ (input.charCodeAt(pos + 6) | 0x20) === 0x65 /* e */
753
+ ) {
754
+ pos += 7;
755
+ commentStart = tagStart;
756
+ state = STATE_DOCTYPE;
757
+ } else if (
758
+ // The string "[CDATA[" (the five uppercase letters "CDATA" with a
759
+ // U+005B LEFT SQUARE BRACKET character before and after)
760
+ // Consume those characters and switch to the CDATA section state.
761
+ cc === CC_LEFT_SQUARE_BRACKET &&
762
+ input.charCodeAt(pos + 1) === 0x43 /* C */ &&
763
+ input.charCodeAt(pos + 2) === 0x44 /* D */ &&
764
+ input.charCodeAt(pos + 3) === 0x41 /* A */ &&
765
+ input.charCodeAt(pos + 4) === 0x54 /* T */ &&
766
+ input.charCodeAt(pos + 5) === 0x41 /* A */ &&
767
+ input.charCodeAt(pos + 6) === CC_LEFT_SQUARE_BRACKET
768
+ ) {
769
+ pos += 7;
770
+ commentStart = tagStart;
771
+ state = STATE_CDATA_SECTION;
772
+ } else {
773
+ // Anything else
774
+ // This is an incorrectly-opened-comment parse error. Create a comment token
775
+ // whose data is the empty string. Switch to the bogus comment state (don't
776
+ // consume anything in the current state).
777
+ commentStart = tagStart;
778
+ state = STATE_BOGUS_COMMENT;
779
+ // Reconsume
780
+ }
781
+ break;
782
+
783
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
784
+ case STATE_COMMENT_START:
785
+ // Consume the next input character:
786
+ // U+002D HYPHEN-MINUS (-)
787
+ // Switch to the comment start dash state.
788
+ if (cc === CC_HYPHEN_MINUS) {
789
+ state = STATE_COMMENT_START_DASH;
790
+ pos++;
791
+ } else if (cc === CC_GREATER_THAN) {
792
+ // U+003E GREATER-THAN SIGN (>)
793
+ // This is an abrupt-closing-of-empty-comment parse error. Switch to the
794
+ // data state. Emit the current comment token.
795
+ let nextPos = pos + 1;
796
+ if (callbacks.comment !== undefined) {
797
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
798
+ }
799
+ state = STATE_DATA;
800
+ textStart = nextPos;
801
+ pos = nextPos;
802
+ } else {
803
+ // Anything else
804
+ // Reconsume in the comment state.
805
+ state = STATE_COMMENT;
806
+ pos++;
807
+ }
808
+ break;
809
+
810
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
811
+ case STATE_COMMENT_START_DASH:
812
+ // Consume the next input character:
813
+ // U+002D HYPHEN-MINUS (-)
814
+ // Switch to the comment end state.
815
+ if (cc === CC_HYPHEN_MINUS) {
816
+ state = STATE_COMMENT_END;
817
+ pos++;
818
+ } else if (cc === CC_GREATER_THAN) {
819
+ // U+003E GREATER-THAN SIGN (>)
820
+ // This is an abrupt-closing-of-empty-comment parse error. Switch to the
821
+ // data state. Emit the current comment token.
822
+ let nextPos = pos + 1;
823
+ if (callbacks.comment !== undefined) {
824
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
825
+ }
826
+ state = STATE_DATA;
827
+ textStart = nextPos;
828
+ pos = nextPos;
829
+ } else {
830
+ // Anything else
831
+ // Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
832
+ // Reconsume in the comment state.
833
+ state = STATE_COMMENT;
834
+ pos++;
835
+ }
836
+ break;
837
+
838
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-state
839
+ case STATE_COMMENT:
840
+ // Consume the next input character:
841
+ // U+003C LESS-THAN SIGN (<)
842
+ // Append a U+003C LESS-THAN SIGN character to the comment token's data. Switch to the comment less-than sign state.
843
+ if (cc === CC_LESS_THAN) {
844
+ state = STATE_COMMENT_LESS_THAN_SIGN;
845
+ pos++;
846
+ } else if (cc === CC_HYPHEN_MINUS) {
847
+ // Consume the next input character:
848
+ // U+002D HYPHEN-MINUS (-)
849
+ // Switch to the comment end dash state.
850
+ state = STATE_COMMENT_END_DASH;
851
+ pos++;
852
+ } else {
853
+ pos++;
854
+ }
855
+ break;
856
+
857
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
858
+ case STATE_COMMENT_END_DASH:
859
+ // Consume the next input character:
860
+ // U+002D HYPHEN-MINUS (-)
861
+ // Switch to the comment end state.
862
+ if (cc === CC_HYPHEN_MINUS) {
863
+ state = STATE_COMMENT_END;
864
+ pos++;
865
+ } else {
866
+ // Anything else
867
+ // Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
868
+ // Reconsume in the comment state.
869
+ state = STATE_COMMENT;
870
+ pos++;
871
+ }
872
+ break;
873
+
874
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
875
+ case STATE_COMMENT_END:
876
+ // Consume the next input character:
877
+ // U+003E GREATER-THAN SIGN (>)
878
+ // Switch to the data state. Emit the current comment token.
879
+ if (cc === CC_GREATER_THAN) {
880
+ let nextPos = pos + 1;
881
+ if (callbacks.comment !== undefined) {
882
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
883
+ }
884
+ state = STATE_DATA;
885
+ textStart = nextPos;
886
+ pos = nextPos;
887
+ } else if (cc === CC_EXCLAMATION_MARK) {
888
+ // U+0021 EXCLAMATION MARK (!)
889
+ // Switch to the markup declaration open state.
890
+ state = STATE_COMMENT_END_BANG;
891
+ pos++;
892
+ } else if (cc === CC_HYPHEN_MINUS) {
893
+ pos++;
894
+ } else {
895
+ // Anything else
896
+ // Append two U+002D HYPHEN-MINUS characters (-) to the comment token's
897
+ // data. Reconsume in the comment state.
898
+ state = STATE_COMMENT;
899
+ pos++;
900
+ }
901
+ break;
902
+
903
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
904
+ case STATE_COMMENT_END_BANG:
905
+ // Consume the next input character:
906
+ // U+002D HYPHEN-MINUS (-)
907
+ // Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
908
+ // MARK character (!) to the comment token's data. Switch to the comment end
909
+ // dash state.
910
+ if (cc === CC_HYPHEN_MINUS) {
911
+ state = STATE_COMMENT_END_DASH;
912
+ pos++;
913
+ } else if (cc === CC_GREATER_THAN) {
914
+ // U+003E GREATER-THAN SIGN (>)
915
+ // This is an incorrectly-closed-comment parse error. Switch to the data
916
+ // state. Emit the current comment token.
917
+ let nextPos = pos + 1;
918
+ if (callbacks.comment !== undefined) {
919
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
920
+ }
921
+ state = STATE_DATA;
922
+ textStart = nextPos;
923
+ pos = nextPos;
924
+ } else {
925
+ // Anything else
926
+ // Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
927
+ // MARK character (!) to the comment token's data. Reconsume in the comment
928
+ // state.
929
+ state = STATE_COMMENT;
930
+ pos++;
931
+ }
932
+ break;
933
+
934
+ // https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
935
+ case STATE_BOGUS_COMMENT:
936
+ // Consume the next input character:
937
+ // U+003E GREATER-THAN SIGN (>)
938
+ // Switch to the data state. Emit the current comment token.
939
+ if (cc === CC_GREATER_THAN) {
940
+ let nextPos = pos + 1;
941
+ if (callbacks.comment !== undefined) {
942
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
943
+ }
944
+ state = STATE_DATA;
945
+ textStart = nextPos;
946
+ pos = nextPos;
947
+ } else {
948
+ pos++;
949
+ }
950
+ break;
951
+
952
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-state
953
+ case STATE_COMMENT_LESS_THAN_SIGN:
954
+ // Consume the next input character:
955
+ // U+0021 EXCLAMATION MARK (!)
956
+ // Append the current input character to the comment token's data. Switch to
957
+ // the comment less-than sign bang state.
958
+ if (cc === CC_EXCLAMATION_MARK) {
959
+ state = STATE_COMMENT_LESS_THAN_SIGN_BANG;
960
+ pos++;
961
+ } else if (cc === CC_LESS_THAN) {
962
+ // U+003C LESS-THAN SIGN (<)
963
+ // Append the current input character to the comment token's data.
964
+ pos++;
965
+ } else {
966
+ // Anything else
967
+ // Reconsume in the comment state.
968
+ state = STATE_COMMENT;
969
+ // Reconsume
970
+ }
971
+ break;
972
+
973
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-state
974
+ case STATE_COMMENT_LESS_THAN_SIGN_BANG:
975
+ // Consume the next input character:
976
+ // U+002D HYPHEN-MINUS (-)
977
+ // Switch to the comment less-than sign bang dash state.
978
+ if (cc === CC_HYPHEN_MINUS) {
979
+ state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH;
980
+ pos++;
981
+ } else {
982
+ // Anything else
983
+ // Reconsume in the comment state.
984
+ state = STATE_COMMENT;
985
+ // Reconsume
986
+ }
987
+ break;
988
+
989
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-state
990
+ case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH:
991
+ // Consume the next input character:
992
+ // U+002D HYPHEN-MINUS (-)
993
+ // Switch to the comment less-than sign bang dash dash state.
994
+ if (cc === CC_HYPHEN_MINUS) {
995
+ state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
996
+ pos++;
997
+ } else {
998
+ // Anything else
999
+ // Reconsume in the comment end dash state.
1000
+ state = STATE_COMMENT_END_DASH;
1001
+ // Reconsume
1002
+ }
1003
+ break;
1004
+
1005
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-dash-state
1006
+ case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH:
1007
+ // Consume the next input character:
1008
+ // U+003E GREATER-THAN SIGN (>)
1009
+ // EOF
1010
+ // Reconsume in the comment end state.
1011
+ // Anything else
1012
+ // This is a nested-comment parse error. Reconsume in the comment end state.
1013
+ state = STATE_COMMENT_END;
1014
+ // Reconsume
1015
+ break;
1016
+
1017
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
1018
+ case STATE_DOCTYPE:
1019
+ // Consume the next input character:
1020
+ // U+0009 CHARACTER TABULATION (tab)
1021
+ // U+000A LINE FEED (LF)
1022
+ // U+000C FORM FEED (FF)
1023
+ // U+0020 SPACE
1024
+ // Switch to the before DOCTYPE name state.
1025
+ if (isSpace(cc)) {
1026
+ state = STATE_BEFORE_DOCTYPE_NAME;
1027
+ pos++;
1028
+ } else if (cc === CC_GREATER_THAN) {
1029
+ // U+003E GREATER-THAN SIGN (>)
1030
+ // Reconsume in the before DOCTYPE name state.
1031
+ state = STATE_BEFORE_DOCTYPE_NAME;
1032
+ } else {
1033
+ // Anything else
1034
+ // This is a missing-whitespace-before-doctype-name parse error. Reconsume
1035
+ // in the before DOCTYPE name state.
1036
+ state = STATE_BEFORE_DOCTYPE_NAME;
1037
+ }
1038
+ break;
1039
+
1040
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
1041
+ case STATE_BEFORE_DOCTYPE_NAME:
1042
+ // Consume the next input character:
1043
+ // U+0009 CHARACTER TABULATION (tab)
1044
+ // U+000A LINE FEED (LF)
1045
+ // U+000C FORM FEED (FF)
1046
+ // U+0020 SPACE
1047
+ // Ignore the character.
1048
+ if (isSpace(cc)) {
1049
+ pos++;
1050
+ } else if (cc === 0x00) {
1051
+ // U+0000 NULL
1052
+ // This is an unexpected-null-character parse error. Create a new DOCTYPE
1053
+ // token. Set the token's name to a U+FFFD REPLACEMENT CHARACTER character.
1054
+ // Switch to the DOCTYPE name state.
1055
+ state = STATE_DOCTYPE_NAME;
1056
+ pos++;
1057
+ } else if (cc === CC_GREATER_THAN) {
1058
+ // U+003E GREATER-THAN SIGN (>)
1059
+ // This is a missing-doctype-name parse error. Create a new DOCTYPE token.
1060
+ // Set its force-quirks flag to on. Switch to the data state. Emit the
1061
+ // current token.
1062
+ let nextPos = pos + 1;
1063
+ if (callbacks.doctype !== undefined) {
1064
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1065
+ }
1066
+ state = STATE_DATA;
1067
+ textStart = nextPos;
1068
+ pos = nextPos;
1069
+ } else {
1070
+ // ASCII upper alpha
1071
+ // Create a new DOCTYPE token. Set the token's name to the lowercase version
1072
+ // of the current input character (add 0x0020 to the character's code
1073
+ // point). Switch to the DOCTYPE name state.
1074
+ // Anything else
1075
+ // Create a new DOCTYPE token. Set the token's name to the current input
1076
+ // character. Switch to the DOCTYPE name state.
1077
+ state = STATE_DOCTYPE_NAME;
1078
+ pos++;
1079
+ }
1080
+ break;
1081
+
1082
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
1083
+ case STATE_DOCTYPE_NAME:
1084
+ // Consume the next input character:
1085
+ // U+0009 CHARACTER TABULATION (tab)
1086
+ // U+000A LINE FEED (LF)
1087
+ // U+000C FORM FEED (FF)
1088
+ // U+0020 SPACE
1089
+ // Switch to the after DOCTYPE name state.
1090
+ if (isSpace(cc)) {
1091
+ state = STATE_AFTER_DOCTYPE_NAME;
1092
+ pos++;
1093
+ } else if (cc === CC_GREATER_THAN) {
1094
+ // U+003E GREATER-THAN SIGN (>)
1095
+ // Switch to the data state. Emit the current DOCTYPE token.
1096
+ let nextPos = pos + 1;
1097
+ if (callbacks.doctype !== undefined) {
1098
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1099
+ }
1100
+ state = STATE_DATA;
1101
+ textStart = nextPos;
1102
+ pos = nextPos;
1103
+ } else if (cc === 0x00) {
1104
+ // U+0000 NULL
1105
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1106
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's name.
1107
+ pos++;
1108
+ } else {
1109
+ // ASCII upper alpha
1110
+ // Append the lowercase version of the current input character (add 0x0020
1111
+ // to the character's code point) to the current DOCTYPE token's name.
1112
+ // Anything else
1113
+ // Append the current input character to the current DOCTYPE token's name.
1114
+ pos++;
1115
+ }
1116
+ break;
1117
+
1118
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
1119
+ case STATE_AFTER_DOCTYPE_NAME:
1120
+ // Consume the next input character:
1121
+ if (isSpace(cc)) {
1122
+ // U+0009 CHARACTER TABULATION (tab)
1123
+ // U+000A LINE FEED (LF)
1124
+ // U+000C FORM FEED (FF)
1125
+ // U+0020 SPACE
1126
+ // Ignore the character.
1127
+ pos++;
1128
+ } else if (cc === CC_GREATER_THAN) {
1129
+ // U+003E GREATER-THAN SIGN (>)
1130
+ // Switch to the data state. Emit the current DOCTYPE token.
1131
+ let nextPos = pos + 1;
1132
+ if (callbacks.doctype !== undefined) {
1133
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1134
+ }
1135
+ state = STATE_DATA;
1136
+ textStart = nextPos;
1137
+ pos = nextPos;
1138
+ } else if (
1139
+ pos + 5 < len &&
1140
+ (cc === 0x50 || cc === 0x70) /* P or p */ &&
1141
+ (input.charCodeAt(pos + 1) | 0x20) === 0x75 /* u */ &&
1142
+ (input.charCodeAt(pos + 2) | 0x20) === 0x62 /* b */ &&
1143
+ (input.charCodeAt(pos + 3) | 0x20) === 0x6c /* l */ &&
1144
+ (input.charCodeAt(pos + 4) | 0x20) === 0x69 /* i */ &&
1145
+ (input.charCodeAt(pos + 5) | 0x20) === 0x63 /* c */
1146
+ ) {
1147
+ // ASCII case-insensitive match for the word "PUBLIC"
1148
+ pos += 6;
1149
+ state = STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD;
1150
+ } else if (
1151
+ pos + 5 < len &&
1152
+ (cc === 0x53 || cc === 0x73) /* S or s */ &&
1153
+ (input.charCodeAt(pos + 1) | 0x20) === 0x79 /* y */ &&
1154
+ (input.charCodeAt(pos + 2) | 0x20) === 0x73 /* s */ &&
1155
+ (input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
1156
+ (input.charCodeAt(pos + 4) | 0x20) === 0x65 /* e */ &&
1157
+ (input.charCodeAt(pos + 5) | 0x20) === 0x6d /* m */
1158
+ ) {
1159
+ // ASCII case-insensitive match for the word "SYSTEM"
1160
+ pos += 6;
1161
+ state = STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD;
1162
+ } else {
1163
+ // Anything else
1164
+ // This is an invalid-character-sequence-after-doctype-name parse error. Set
1165
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1166
+ // bogus DOCTYPE state.
1167
+ state = STATE_BOGUS_DOCTYPE;
1168
+ }
1169
+ break;
1170
+
1171
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
1172
+ case STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD:
1173
+ // Consume the next input character:
1174
+ if (isSpace(cc)) {
1175
+ // U+0009 CHARACTER TABULATION (tab)
1176
+ // U+000A LINE FEED (LF)
1177
+ // U+000C FORM FEED (FF)
1178
+ // U+0020 SPACE
1179
+ // Switch to the before DOCTYPE public identifier state.
1180
+ state = STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
1181
+ pos++;
1182
+ } else if (cc === CC_QUOTATION_MARK) {
1183
+ // U+0022 QUOTATION MARK (")
1184
+ // This is a missing-whitespace-after-doctype-public-keyword parse error.
1185
+ // Set the current DOCTYPE token's public identifier to the empty string
1186
+ // (not missing), then switch to the DOCTYPE public identifier
1187
+ // (double-quoted) state.
1188
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
1189
+ pos++;
1190
+ } else if (cc === CC_APOSTROPHE) {
1191
+ // U+0027 APOSTROPHE (')
1192
+ // This is a missing-whitespace-after-doctype-public-keyword parse error.
1193
+ // Set the current DOCTYPE token's public identifier to the empty string
1194
+ // (not missing), then switch to the DOCTYPE public identifier
1195
+ // (single-quoted) state.
1196
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
1197
+ pos++;
1198
+ } else if (cc === CC_GREATER_THAN) {
1199
+ // U+003E GREATER-THAN SIGN (>)
1200
+ // This is a missing-doctype-public-identifier parse error. Set the current
1201
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1202
+ // the current DOCTYPE token.
1203
+ let nextPos = pos + 1;
1204
+ if (callbacks.doctype !== undefined) {
1205
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1206
+ }
1207
+ state = STATE_DATA;
1208
+ textStart = nextPos;
1209
+ pos = nextPos;
1210
+ } else {
1211
+ // Anything else
1212
+ // This is a missing-quote-before-doctype-public-identifier parse error. Set
1213
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1214
+ // bogus DOCTYPE state.
1215
+ state = STATE_BOGUS_DOCTYPE;
1216
+ }
1217
+ break;
1218
+
1219
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
1220
+ case STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
1221
+ // Consume the next input character:
1222
+ if (isSpace(cc)) {
1223
+ // U+0009 CHARACTER TABULATION (tab)
1224
+ // U+000A LINE FEED (LF)
1225
+ // U+000C FORM FEED (FF)
1226
+ // U+0020 SPACE
1227
+ // Ignore the character.
1228
+ pos++;
1229
+ } else if (cc === CC_QUOTATION_MARK) {
1230
+ // U+0022 QUOTATION MARK (")
1231
+ // Set the current DOCTYPE token's public identifier to the empty string
1232
+ // (not missing), then switch to the DOCTYPE public identifier
1233
+ // (double-quoted) state.
1234
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
1235
+ pos++;
1236
+ } else if (cc === CC_APOSTROPHE) {
1237
+ // U+0027 APOSTROPHE (')
1238
+ // Set the current DOCTYPE token's public identifier to the empty string
1239
+ // (not missing), then switch to the DOCTYPE public identifier
1240
+ // (single-quoted) state.
1241
+ state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
1242
+ pos++;
1243
+ } else if (cc === CC_GREATER_THAN) {
1244
+ // U+003E GREATER-THAN SIGN (>)
1245
+ // This is a missing-doctype-public-identifier parse error. Set the current
1246
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1247
+ // the current DOCTYPE token.
1248
+ let nextPos = pos + 1;
1249
+ if (callbacks.doctype !== undefined) {
1250
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1251
+ }
1252
+ state = STATE_DATA;
1253
+ textStart = nextPos;
1254
+ pos = nextPos;
1255
+ } else {
1256
+ // Anything else
1257
+ // This is a missing-quote-before-doctype-public-identifier parse error. Set
1258
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1259
+ // bogus DOCTYPE state.
1260
+ state = STATE_BOGUS_DOCTYPE;
1261
+ }
1262
+ break;
1263
+
1264
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
1265
+ case STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
1266
+ // Consume the next input character:
1267
+ if (cc === CC_QUOTATION_MARK) {
1268
+ // U+0022 QUOTATION MARK (")
1269
+ // Switch to the after DOCTYPE public identifier state.
1270
+ state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
1271
+ pos++;
1272
+ } else if (cc === 0x00) {
1273
+ // U+0000 NULL
1274
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1275
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
1276
+ // identifier.
1277
+ pos++;
1278
+ } else if (cc === CC_GREATER_THAN) {
1279
+ // U+003E GREATER-THAN SIGN (>)
1280
+ // This is an abrupt-doctype-public-identifier parse error. Set the current
1281
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1282
+ // the current DOCTYPE token.
1283
+ let nextPos = pos + 1;
1284
+ if (callbacks.doctype !== undefined) {
1285
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1286
+ }
1287
+ state = STATE_DATA;
1288
+ textStart = nextPos;
1289
+ pos = nextPos;
1290
+ } else {
1291
+ // Anything else
1292
+ // Append the current input character to the current DOCTYPE token's public
1293
+ // identifier.
1294
+ pos++;
1295
+ }
1296
+ break;
1297
+
1298
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
1299
+ case STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
1300
+ // Consume the next input character:
1301
+ if (cc === CC_APOSTROPHE) {
1302
+ // U+0027 APOSTROPHE (')
1303
+ // Switch to the after DOCTYPE public identifier state.
1304
+ state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
1305
+ pos++;
1306
+ } else if (cc === 0x00) {
1307
+ // U+0000 NULL
1308
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1309
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
1310
+ // identifier.
1311
+ pos++;
1312
+ } else if (cc === CC_GREATER_THAN) {
1313
+ // U+003E GREATER-THAN SIGN (>)
1314
+ // This is an abrupt-doctype-public-identifier parse error. Set the current
1315
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1316
+ // the current DOCTYPE token.
1317
+ let nextPos = pos + 1;
1318
+ if (callbacks.doctype !== undefined) {
1319
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1320
+ }
1321
+ state = STATE_DATA;
1322
+ textStart = nextPos;
1323
+ pos = nextPos;
1324
+ } else {
1325
+ // Anything else
1326
+ // Append the current input character to the current DOCTYPE token's public
1327
+ // identifier.
1328
+ pos++;
1329
+ }
1330
+ break;
1331
+
1332
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
1333
+ case STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
1334
+ // Consume the next input character:
1335
+ if (isSpace(cc)) {
1336
+ // U+0009 CHARACTER TABULATION (tab)
1337
+ // U+000A LINE FEED (LF)
1338
+ // U+000C FORM FEED (FF)
1339
+ // U+0020 SPACE
1340
+ // Switch to the between DOCTYPE public and system identifiers state.
1341
+ state = STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
1342
+ pos++;
1343
+ } else if (cc === CC_GREATER_THAN) {
1344
+ // U+003E GREATER-THAN SIGN (>)
1345
+ // Switch to the data state. Emit the current DOCTYPE token.
1346
+ let nextPos = pos + 1;
1347
+ if (callbacks.doctype !== undefined) {
1348
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1349
+ }
1350
+ state = STATE_DATA;
1351
+ textStart = nextPos;
1352
+ pos = nextPos;
1353
+ } else if (cc === CC_QUOTATION_MARK) {
1354
+ // U+0022 QUOTATION MARK (")
1355
+ // This is a missing-whitespace-between-doctype-public-and-system-identifiers
1356
+ // parse error. Set the current DOCTYPE token's system
1357
+ // identifier to the empty string (not missing), then switch
1358
+ // to the DOCTYPE system identifier (double-quoted) state.
1359
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1360
+ pos++;
1361
+ } else if (cc === CC_APOSTROPHE) {
1362
+ // U+0027 APOSTROPHE (')
1363
+ // This is a missing-whitespace-between-doctype-public-and-system-identifiers
1364
+ // parse error. Set the current DOCTYPE token's system
1365
+ // identifier to the empty string (not missing), then switch
1366
+ // to the DOCTYPE system identifier (single-quoted) state.
1367
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1368
+ pos++;
1369
+ } else {
1370
+ // Anything else
1371
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1372
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1373
+ // bogus DOCTYPE state.
1374
+ state = STATE_BOGUS_DOCTYPE;
1375
+ }
1376
+ break;
1377
+
1378
+ // https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
1379
+ case STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
1380
+ // Consume the next input character:
1381
+ if (isSpace(cc)) {
1382
+ // U+0009 CHARACTER TABULATION (tab)
1383
+ // U+000A LINE FEED (LF)
1384
+ // U+000C FORM FEED (FF)
1385
+ // U+0020 SPACE
1386
+ // Ignore the character.
1387
+ pos++;
1388
+ } else if (cc === CC_GREATER_THAN) {
1389
+ // U+003E GREATER-THAN SIGN (>)
1390
+ // Switch to the data state. Emit the current DOCTYPE token.
1391
+ let nextPos = pos + 1;
1392
+ if (callbacks.doctype !== undefined) {
1393
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1394
+ }
1395
+ state = STATE_DATA;
1396
+ textStart = nextPos;
1397
+ pos = nextPos;
1398
+ } else if (cc === CC_QUOTATION_MARK) {
1399
+ // U+0022 QUOTATION MARK (")
1400
+ // Set the current DOCTYPE token's system identifier to the empty string
1401
+ // (not missing), then switch to the DOCTYPE system identifier
1402
+ // (double-quoted) state.
1403
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1404
+ pos++;
1405
+ } else if (cc === CC_APOSTROPHE) {
1406
+ // U+0027 APOSTROPHE (')
1407
+ // Set the current DOCTYPE token's system identifier to the empty string
1408
+ // (not missing), then switch to the DOCTYPE system identifier
1409
+ // (single-quoted) state.
1410
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1411
+ pos++;
1412
+ } else {
1413
+ // Anything else
1414
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1415
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1416
+ // bogus DOCTYPE state.
1417
+ state = STATE_BOGUS_DOCTYPE;
1418
+ }
1419
+ break;
1420
+
1421
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
1422
+ case STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD:
1423
+ // Consume the next input character:
1424
+ if (isSpace(cc)) {
1425
+ // U+0009 CHARACTER TABULATION (tab)
1426
+ // U+000A LINE FEED (LF)
1427
+ // U+000C FORM FEED (FF)
1428
+ // U+0020 SPACE
1429
+ // Switch to the before DOCTYPE system identifier state.
1430
+ state = STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
1431
+ pos++;
1432
+ } else if (cc === CC_QUOTATION_MARK) {
1433
+ // U+0022 QUOTATION MARK (")
1434
+ // This is a missing-whitespace-after-doctype-system-keyword parse error.
1435
+ // Set the current DOCTYPE token's system identifier to the empty string
1436
+ // (not missing), then switch to the DOCTYPE system identifier
1437
+ // (double-quoted) state.
1438
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1439
+ pos++;
1440
+ } else if (cc === CC_APOSTROPHE) {
1441
+ // U+0027 APOSTROPHE (')
1442
+ // This is a missing-whitespace-after-doctype-system-keyword parse error.
1443
+ // Set the current DOCTYPE token's system identifier to the empty string
1444
+ // (not missing), then switch to the DOCTYPE system identifier
1445
+ // (single-quoted) state.
1446
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1447
+ pos++;
1448
+ } else if (cc === CC_GREATER_THAN) {
1449
+ // U+003E GREATER-THAN SIGN (>)
1450
+ // This is a missing-doctype-system-identifier parse error. Set the current
1451
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1452
+ // the current DOCTYPE token.
1453
+ let nextPos = pos + 1;
1454
+ if (callbacks.doctype !== undefined) {
1455
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1456
+ }
1457
+ state = STATE_DATA;
1458
+ textStart = nextPos;
1459
+ pos = nextPos;
1460
+ } else {
1461
+ // Anything else
1462
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1463
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1464
+ // bogus DOCTYPE state.
1465
+ state = STATE_BOGUS_DOCTYPE;
1466
+ }
1467
+ break;
1468
+
1469
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
1470
+ case STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
1471
+ // Consume the next input character:
1472
+ if (isSpace(cc)) {
1473
+ // U+0009 CHARACTER TABULATION (tab)
1474
+ // U+000A LINE FEED (LF)
1475
+ // U+000C FORM FEED (FF)
1476
+ // U+0020 SPACE
1477
+ // Ignore the character.
1478
+ pos++;
1479
+ } else if (cc === CC_QUOTATION_MARK) {
1480
+ // U+0022 QUOTATION MARK (")
1481
+ // Set the current DOCTYPE token's system identifier to the empty string
1482
+ // (not missing), then switch to the DOCTYPE system identifier
1483
+ // (double-quoted) state.
1484
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
1485
+ pos++;
1486
+ } else if (cc === CC_APOSTROPHE) {
1487
+ // U+0027 APOSTROPHE (')
1488
+ // Set the current DOCTYPE token's system identifier to the empty string
1489
+ // (not missing), then switch to the DOCTYPE system identifier
1490
+ // (single-quoted) state.
1491
+ state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
1492
+ pos++;
1493
+ } else if (cc === CC_GREATER_THAN) {
1494
+ // U+003E GREATER-THAN SIGN (>)
1495
+ // This is a missing-doctype-system-identifier parse error. Set the current
1496
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1497
+ // the current DOCTYPE token.
1498
+ let nextPos = pos + 1;
1499
+ if (callbacks.doctype !== undefined) {
1500
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1501
+ }
1502
+ state = STATE_DATA;
1503
+ textStart = nextPos;
1504
+ pos = nextPos;
1505
+ } else {
1506
+ // Anything else
1507
+ // This is a missing-quote-before-doctype-system-identifier parse error. Set
1508
+ // the current DOCTYPE token's force-quirks flag to on. Reconsume in the
1509
+ // bogus DOCTYPE state.
1510
+ state = STATE_BOGUS_DOCTYPE;
1511
+ }
1512
+ break;
1513
+
1514
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
1515
+ case STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
1516
+ // Consume the next input character:
1517
+ if (cc === CC_QUOTATION_MARK) {
1518
+ // U+0022 QUOTATION MARK (")
1519
+ // Switch to the after DOCTYPE system identifier state.
1520
+ state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
1521
+ pos++;
1522
+ } else if (cc === 0x00) {
1523
+ // U+0000 NULL
1524
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1525
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
1526
+ // identifier.
1527
+ pos++;
1528
+ } else if (cc === CC_GREATER_THAN) {
1529
+ // U+003E GREATER-THAN SIGN (>)
1530
+ // This is an abrupt-doctype-system-identifier parse error. Set the current
1531
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1532
+ // the current DOCTYPE token.
1533
+ let nextPos = pos + 1;
1534
+ if (callbacks.doctype !== undefined) {
1535
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1536
+ }
1537
+ state = STATE_DATA;
1538
+ textStart = nextPos;
1539
+ pos = nextPos;
1540
+ } else {
1541
+ // Anything else
1542
+ // Append the current input character to the current DOCTYPE token's system
1543
+ // identifier.
1544
+ pos++;
1545
+ }
1546
+ break;
1547
+
1548
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
1549
+ case STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
1550
+ // Consume the next input character:
1551
+ if (cc === CC_APOSTROPHE) {
1552
+ // U+0027 APOSTROPHE (')
1553
+ // Switch to the after DOCTYPE system identifier state.
1554
+ state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
1555
+ pos++;
1556
+ } else if (cc === 0x00) {
1557
+ // U+0000 NULL
1558
+ // This is an unexpected-null-character parse error. Append a U+FFFD
1559
+ // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
1560
+ // identifier.
1561
+ pos++;
1562
+ } else if (cc === CC_GREATER_THAN) {
1563
+ // U+003E GREATER-THAN SIGN (>)
1564
+ // This is an abrupt-doctype-system-identifier parse error. Set the current
1565
+ // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
1566
+ // the current DOCTYPE token.
1567
+ let nextPos = pos + 1;
1568
+ if (callbacks.doctype !== undefined) {
1569
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1570
+ }
1571
+ state = STATE_DATA;
1572
+ textStart = nextPos;
1573
+ pos = nextPos;
1574
+ } else {
1575
+ // Anything else
1576
+ // Append the current input character to the current DOCTYPE token's system
1577
+ // identifier.
1578
+ pos++;
1579
+ }
1580
+ break;
1581
+
1582
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
1583
+ case STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
1584
+ // Consume the next input character:
1585
+ if (isSpace(cc)) {
1586
+ // U+0009 CHARACTER TABULATION (tab)
1587
+ // U+000A LINE FEED (LF)
1588
+ // U+000C FORM FEED (FF)
1589
+ // U+0020 SPACE
1590
+ // Ignore the character.
1591
+ pos++;
1592
+ } else if (cc === CC_GREATER_THAN) {
1593
+ // U+003E GREATER-THAN SIGN (>)
1594
+ // Switch to the data state. Emit the current DOCTYPE token.
1595
+ let nextPos = pos + 1;
1596
+ if (callbacks.doctype !== undefined) {
1597
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1598
+ }
1599
+ state = STATE_DATA;
1600
+ textStart = nextPos;
1601
+ pos = nextPos;
1602
+ } else {
1603
+ // Anything else
1604
+ // This is an unexpected-character-after-doctype-system-identifier parse
1605
+ // error. Reconsume in the bogus DOCTYPE state. (This does not set the
1606
+ // current DOCTYPE token's force-quirks flag to on.)
1607
+ state = STATE_BOGUS_DOCTYPE;
1608
+ }
1609
+ break;
1610
+
1611
+ // https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
1612
+ case STATE_BOGUS_DOCTYPE:
1613
+ // Consume the next input character:
1614
+ if (cc === CC_GREATER_THAN) {
1615
+ // U+003E GREATER-THAN SIGN (>)
1616
+ // Switch to the data state. Emit the DOCTYPE token.
1617
+ let nextPos = pos + 1;
1618
+ if (callbacks.doctype !== undefined) {
1619
+ nextPos = callbacks.doctype(input, commentStart, pos + 1);
1620
+ }
1621
+ state = STATE_DATA;
1622
+ textStart = nextPos;
1623
+ pos = nextPos;
1624
+ } else if (cc === 0x00) {
1625
+ // U+0000 NULL
1626
+ // This is an unexpected-null-character parse error. Ignore the character.
1627
+ pos++;
1628
+ } else {
1629
+ // Anything else
1630
+ // Ignore the character.
1631
+ pos++;
1632
+ }
1633
+ break;
1634
+
1635
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
1636
+ case STATE_CDATA_SECTION:
1637
+ // Consume the next input character:
1638
+ // U+005D RIGHT SQUARE BRACKET (])
1639
+ // Switch to the CDATA section bracket state.
1640
+ if (cc === CC_RIGHT_SQUARE_BRACKET) {
1641
+ state = STATE_CDATA_SECTION_BRACKET;
1642
+ pos++;
1643
+ } else {
1644
+ // Anything else
1645
+ // Emit the current input character as a character token.
1646
+ pos++;
1647
+ }
1648
+ break;
1649
+
1650
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state
1651
+ case STATE_CDATA_SECTION_BRACKET:
1652
+ // Consume the next input character:
1653
+ // U+005D RIGHT SQUARE BRACKET (])
1654
+ // Switch to the CDATA section end state.
1655
+ if (cc === CC_RIGHT_SQUARE_BRACKET) {
1656
+ state = STATE_CDATA_SECTION_END;
1657
+ pos++;
1658
+ } else {
1659
+ // Anything else
1660
+ // Emit a U+005D RIGHT SQUARE BRACKET character token. Reconsume in the
1661
+ // CDATA section state.
1662
+ state = STATE_CDATA_SECTION;
1663
+ // Reconsume
1664
+ }
1665
+ break;
1666
+
1667
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state
1668
+ case STATE_CDATA_SECTION_END:
1669
+ // Consume the next input character:
1670
+ // U+005D RIGHT SQUARE BRACKET (])
1671
+ // Emit a U+005D RIGHT SQUARE BRACKET character token.
1672
+ if (cc === CC_RIGHT_SQUARE_BRACKET) {
1673
+ pos++;
1674
+ } else if (cc === CC_GREATER_THAN) {
1675
+ // U+003E GREATER-THAN SIGN (>)
1676
+ // Switch to the data state.
1677
+ let nextPos = pos + 1;
1678
+ if (callbacks.comment !== undefined) {
1679
+ nextPos = callbacks.comment(input, commentStart, pos + 1);
1680
+ }
1681
+ state = STATE_DATA;
1682
+ textStart = nextPos;
1683
+ pos = nextPos;
1684
+ } else {
1685
+ // Anything else
1686
+ // Emit two U+005D RIGHT SQUARE BRACKET character tokens. Reconsume in the
1687
+ // CDATA section state.
1688
+ state = STATE_CDATA_SECTION;
1689
+ // Reconsume
1690
+ }
1691
+ break;
1692
+
1693
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
1694
+ case STATE_RCDATA:
1695
+ // Consume the next input character:
1696
+ // U+003C LESS-THAN SIGN (<)
1697
+ // Switch to the RCDATA less-than sign state.
1698
+ if (cc === CC_LESS_THAN) {
1699
+ tagStart = pos;
1700
+ state = STATE_RCDATA_LESS_THAN_SIGN;
1701
+ pos++;
1702
+ } else {
1703
+ // Anything else
1704
+ // Emit the current input character as a character token.
1705
+ pos++;
1706
+ }
1707
+ break;
1708
+
1709
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
1710
+ case STATE_RCDATA_LESS_THAN_SIGN:
1711
+ // Consume the next input character:
1712
+ // U+002F SOLIDUS (/)
1713
+ // Set the temporary buffer to the empty string. Switch to the RCDATA end
1714
+ // tag open state.
1715
+ if (cc === CC_SOLIDUS) {
1716
+ tempBuffer = "";
1717
+ state = STATE_RCDATA_END_TAG_OPEN;
1718
+ pos++;
1719
+ } else {
1720
+ // Anything else
1721
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RCDATA
1722
+ // state.
1723
+ state = STATE_RCDATA;
1724
+ // Reconsume
1725
+ }
1726
+ break;
1727
+
1728
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
1729
+ case STATE_RCDATA_END_TAG_OPEN:
1730
+ // Consume the next input character:
1731
+ // ASCII alpha
1732
+ // Create a new end tag token, set its tag name to the empty string.
1733
+ // Reconsume in the RCDATA end tag name state.
1734
+ if (isAsciiAlpha(cc)) {
1735
+ tagNameStart = pos;
1736
+ state = STATE_RCDATA_END_TAG_NAME;
1737
+ // Reconsume
1738
+ } else {
1739
+ // Anything else
1740
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
1741
+ // character token. Reconsume in the RCDATA state.
1742
+ state = STATE_RCDATA;
1743
+ // Reconsume
1744
+ }
1745
+ break;
1746
+
1747
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
1748
+ case STATE_RCDATA_END_TAG_NAME:
1749
+ // Consume the next input character:
1750
+ // U+0009 CHARACTER TABULATION (tab)
1751
+ // U+000A LINE FEED (LF)
1752
+ // U+000C FORM FEED (FF)
1753
+ // U+0020 SPACE
1754
+ // If the current end tag token is an appropriate end tag token, then switch
1755
+ // to the before attribute name state. Otherwise, treat it as per the
1756
+ // "anything else" entry below.
1757
+ if (isSpace(cc)) {
1758
+ tagNameEnd = pos;
1759
+ if (
1760
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1761
+ lastOpenTagName
1762
+ ) {
1763
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
1764
+ pos++;
1765
+ } else {
1766
+ state = STATE_RCDATA;
1767
+ // Reconsume
1768
+ }
1769
+ } else if (cc === CC_SOLIDUS) {
1770
+ // U+002F SOLIDUS (/)
1771
+ // If the current end tag token is an appropriate end tag token, then switch
1772
+ // to the self-closing start tag state. Otherwise, treat it as per the
1773
+ // "anything else" entry below.
1774
+ tagNameEnd = pos;
1775
+ if (
1776
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1777
+ lastOpenTagName
1778
+ ) {
1779
+ state = STATE_SELF_CLOSING_START_TAG;
1780
+ pos++;
1781
+ } else {
1782
+ state = STATE_RCDATA;
1783
+ // Reconsume
1784
+ }
1785
+ } else if (cc === CC_GREATER_THAN) {
1786
+ // U+003E GREATER-THAN SIGN (>)
1787
+ // If the current end tag token is an appropriate end tag token, then switch
1788
+ // to the data state and emit the current tag token. Otherwise, treat it as
1789
+ // per the "anything else" entry below.
1790
+ tagNameEnd = pos;
1791
+ if (
1792
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1793
+ lastOpenTagName
1794
+ ) {
1795
+ flushText(tagStart);
1796
+ state = STATE_DATA;
1797
+ pos = emitCloseTag(pos + 1);
1798
+ } else {
1799
+ state = STATE_RCDATA;
1800
+ // Reconsume
1801
+ }
1802
+ } else if (isAsciiAlpha(cc)) {
1803
+ // ASCII upper alpha / ASCII lower alpha
1804
+ // Append the lowercase version of the current input character to the
1805
+ // current tag token's tag name. Append the current input character to
1806
+ // the temporary buffer.
1807
+ pos++;
1808
+ } else {
1809
+ // Anything else
1810
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
1811
+ // token, and a character token for each of the characters in the temporary
1812
+ // buffer (in the order they were added to the buffer). Reconsume in the
1813
+ // RCDATA state.
1814
+ state = STATE_RCDATA;
1815
+ // Reconsume
1816
+ }
1817
+ break;
1818
+
1819
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
1820
+ case STATE_RAWTEXT:
1821
+ // Consume the next input character:
1822
+ // U+003C LESS-THAN SIGN (<)
1823
+ // Switch to the RAWTEXT less-than sign state.
1824
+ if (cc === CC_LESS_THAN) {
1825
+ tagStart = pos;
1826
+ state = STATE_RAWTEXT_LESS_THAN_SIGN;
1827
+ pos++;
1828
+ } else {
1829
+ pos++;
1830
+ }
1831
+ break;
1832
+
1833
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
1834
+ case STATE_RAWTEXT_LESS_THAN_SIGN:
1835
+ // Consume the next input character:
1836
+ // U+002F SOLIDUS (/)
1837
+ // Set the temporary buffer to the empty string. Switch to the RAWTEXT end
1838
+ // tag open state.
1839
+ if (cc === CC_SOLIDUS) {
1840
+ tempBuffer = "";
1841
+ state = STATE_RAWTEXT_END_TAG_OPEN;
1842
+ pos++;
1843
+ } else {
1844
+ // Anything else
1845
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RAWTEXT
1846
+ // state.
1847
+ state = STATE_RAWTEXT;
1848
+ // Reconsume
1849
+ }
1850
+ break;
1851
+
1852
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
1853
+ case STATE_RAWTEXT_END_TAG_OPEN:
1854
+ // Consume the next input character:
1855
+ // ASCII alpha
1856
+ // Create a new end tag token, set its tag name to the empty string.
1857
+ // Reconsume in the RAWTEXT end tag name state.
1858
+ if (isAsciiAlpha(cc)) {
1859
+ tagNameStart = pos;
1860
+ state = STATE_RAWTEXT_END_TAG_NAME;
1861
+ // Reconsume
1862
+ } else {
1863
+ // Anything else
1864
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
1865
+ // character token. Reconsume in the RAWTEXT state.
1866
+ state = STATE_RAWTEXT;
1867
+ // Reconsume
1868
+ }
1869
+ break;
1870
+
1871
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
1872
+ case STATE_RAWTEXT_END_TAG_NAME:
1873
+ // Consume the next input character:
1874
+ // U+0009 CHARACTER TABULATION (tab)
1875
+ // U+000A LINE FEED (LF)
1876
+ // U+000C FORM FEED (FF)
1877
+ // U+0020 SPACE
1878
+ // If the current end tag token is an appropriate end tag token, then switch
1879
+ // to the before attribute name state. Otherwise, treat it as per the
1880
+ // "anything else" entry below.
1881
+ if (isSpace(cc)) {
1882
+ tagNameEnd = pos;
1883
+ if (
1884
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1885
+ lastOpenTagName
1886
+ ) {
1887
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
1888
+ pos++;
1889
+ } else {
1890
+ state = STATE_RAWTEXT;
1891
+ }
1892
+ } else if (cc === CC_SOLIDUS) {
1893
+ // U+002F SOLIDUS (/)
1894
+ // If the current end tag token is an appropriate end tag token, then switch
1895
+ // to the self-closing start tag state. Otherwise, treat it as per the
1896
+ // "anything else" entry below.
1897
+ tagNameEnd = pos;
1898
+ if (
1899
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1900
+ lastOpenTagName
1901
+ ) {
1902
+ state = STATE_SELF_CLOSING_START_TAG;
1903
+ pos++;
1904
+ } else {
1905
+ state = STATE_RAWTEXT;
1906
+ }
1907
+ } else if (cc === CC_GREATER_THAN) {
1908
+ // U+003E GREATER-THAN SIGN (>)
1909
+ // If the current end tag token is an appropriate end tag token, then switch
1910
+ // to the data state and emit the current tag token. Otherwise, treat it as
1911
+ // per the "anything else" entry below.
1912
+ tagNameEnd = pos;
1913
+ if (
1914
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
1915
+ lastOpenTagName
1916
+ ) {
1917
+ flushText(tagStart);
1918
+ state = STATE_DATA;
1919
+ pos = emitCloseTag(pos + 1);
1920
+ } else {
1921
+ state = STATE_RAWTEXT;
1922
+ }
1923
+ } else if (isAsciiAlpha(cc)) {
1924
+ // ASCII upper alpha / ASCII lower alpha
1925
+ // Append the lowercase version of the current input character to the
1926
+ // current tag token's tag name. Append the current input character to
1927
+ // the temporary buffer.
1928
+ pos++;
1929
+ } else {
1930
+ // Anything else
1931
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
1932
+ // token, and a character token for each of the characters in the temporary
1933
+ // buffer (in the order they were added to the buffer). Reconsume in the
1934
+ // RAWTEXT state.
1935
+ state = STATE_RAWTEXT;
1936
+ // Reconsume
1937
+ }
1938
+ break;
1939
+
1940
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
1941
+ case STATE_SCRIPT_DATA:
1942
+ // Consume the next input character:
1943
+ // U+003C LESS-THAN SIGN (<)
1944
+ // Switch to the script data less-than sign state.
1945
+ if (cc === CC_LESS_THAN) {
1946
+ tagStart = pos;
1947
+ state = STATE_SCRIPT_DATA_LESS_THAN_SIGN;
1948
+ pos++;
1949
+ } else {
1950
+ pos++;
1951
+ }
1952
+ break;
1953
+
1954
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
1955
+ case STATE_SCRIPT_DATA_LESS_THAN_SIGN:
1956
+ // Consume the next input character:
1957
+ // U+002F SOLIDUS (/)
1958
+ // Set the temporary buffer to the empty string. Switch to the script data
1959
+ // end tag open state.
1960
+ if (cc === CC_SOLIDUS) {
1961
+ tempBuffer = "";
1962
+ state = STATE_SCRIPT_DATA_END_TAG_OPEN;
1963
+ pos++;
1964
+ } else if (cc === CC_EXCLAMATION_MARK) {
1965
+ // U+0021 EXCLAMATION MARK (!)
1966
+ // Switch to the script data escape start state. Emit a U+003C LESS-THAN
1967
+ // SIGN character token and a U+0021 EXCLAMATION MARK character token.
1968
+ state = STATE_SCRIPT_DATA_ESCAPE_START;
1969
+ pos++;
1970
+ } else {
1971
+ // Anything else
1972
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
1973
+ // data state.
1974
+ state = STATE_SCRIPT_DATA;
1975
+ // Reconsume
1976
+ }
1977
+ break;
1978
+
1979
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
1980
+ case STATE_SCRIPT_DATA_END_TAG_OPEN:
1981
+ // Consume the next input character:
1982
+ // ASCII alpha
1983
+ // Create a new end tag token, set its tag name to the empty string.
1984
+ // Reconsume in the script data end tag name state.
1985
+ if (isAsciiAlpha(cc)) {
1986
+ tagNameStart = pos;
1987
+ state = STATE_SCRIPT_DATA_END_TAG_NAME;
1988
+ // Reconsume
1989
+ } else {
1990
+ // Anything else
1991
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
1992
+ // character token. Reconsume in the script data state.
1993
+ state = STATE_SCRIPT_DATA;
1994
+ // Reconsume
1995
+ }
1996
+ break;
1997
+
1998
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
1999
+ case STATE_SCRIPT_DATA_END_TAG_NAME:
2000
+ // Consume the next input character:
2001
+ // U+0009 CHARACTER TABULATION (tab)
2002
+ // U+000A LINE FEED (LF)
2003
+ // U+000C FORM FEED (FF)
2004
+ // U+0020 SPACE
2005
+ // If the current end tag token is an appropriate end tag token, then switch
2006
+ // to the before attribute name state. Otherwise, treat it as per the
2007
+ // "anything else" entry below.
2008
+ if (isSpace(cc)) {
2009
+ tagNameEnd = pos;
2010
+ if (
2011
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2012
+ lastOpenTagName
2013
+ ) {
2014
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
2015
+ pos++;
2016
+ } else {
2017
+ state = STATE_SCRIPT_DATA;
2018
+ }
2019
+ } else if (cc === CC_SOLIDUS) {
2020
+ // U+002F SOLIDUS (/)
2021
+ // If the current end tag token is an appropriate end tag token, then switch
2022
+ // to the self-closing start tag state. Otherwise, treat it as per the
2023
+ // "anything else" entry below.
2024
+ tagNameEnd = pos;
2025
+ if (
2026
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2027
+ lastOpenTagName
2028
+ ) {
2029
+ state = STATE_SELF_CLOSING_START_TAG;
2030
+ pos++;
2031
+ } else {
2032
+ state = STATE_SCRIPT_DATA;
2033
+ }
2034
+ } else if (cc === CC_GREATER_THAN) {
2035
+ // U+003E GREATER-THAN SIGN (>)
2036
+ // If the current end tag token is an appropriate end tag token, then switch
2037
+ // to the data state and emit the current tag token. Otherwise, treat it as
2038
+ // per the "anything else" entry below.
2039
+ tagNameEnd = pos;
2040
+ if (
2041
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2042
+ lastOpenTagName
2043
+ ) {
2044
+ flushText(tagStart);
2045
+ state = STATE_DATA;
2046
+ pos = emitCloseTag(pos + 1);
2047
+ } else {
2048
+ state = STATE_SCRIPT_DATA;
2049
+ }
2050
+ } else if (isAsciiAlpha(cc)) {
2051
+ // ASCII upper alpha / ASCII lower alpha
2052
+ pos++;
2053
+ } else {
2054
+ // Anything else
2055
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
2056
+ // token, and a character token for each of the characters in the temporary
2057
+ // buffer (in the order they were added to the buffer). Reconsume in the
2058
+ // script data state.
2059
+ state = STATE_SCRIPT_DATA;
2060
+ // Reconsume
2061
+ }
2062
+ break;
2063
+
2064
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
2065
+ case STATE_SCRIPT_DATA_ESCAPE_START:
2066
+ // Consume the next input character:
2067
+ // U+002D HYPHEN-MINUS (-)
2068
+ // Switch to the script data escape start dash state. Emit a U+002D
2069
+ // HYPHEN-MINUS character token.
2070
+ if (cc === CC_HYPHEN_MINUS) {
2071
+ state = STATE_SCRIPT_DATA_ESCAPE_START_DASH;
2072
+ pos++;
2073
+ } else {
2074
+ // Anything else
2075
+ // Reconsume in the script data state.
2076
+ state = STATE_SCRIPT_DATA;
2077
+ // Reconsume
2078
+ }
2079
+ break;
2080
+
2081
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
2082
+ case STATE_SCRIPT_DATA_ESCAPE_START_DASH:
2083
+ // Consume the next input character:
2084
+ // U+002D HYPHEN-MINUS (-)
2085
+ // Switch to the script data escaped dash dash state. Emit a U+002D
2086
+ // HYPHEN-MINUS character token.
2087
+ if (cc === CC_HYPHEN_MINUS) {
2088
+ state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
2089
+ pos++;
2090
+ } else {
2091
+ // Anything else
2092
+ // Reconsume in the script data state.
2093
+ state = STATE_SCRIPT_DATA;
2094
+ // Reconsume
2095
+ }
2096
+ break;
2097
+
2098
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
2099
+ case STATE_SCRIPT_DATA_ESCAPED:
2100
+ // Consume the next input character:
2101
+ // U+002D HYPHEN-MINUS (-)
2102
+ // Switch to the script data escaped dash state. Emit a U+002D HYPHEN-MINUS
2103
+ // character token.
2104
+ if (cc === CC_HYPHEN_MINUS) {
2105
+ state = STATE_SCRIPT_DATA_ESCAPED_DASH;
2106
+ pos++;
2107
+ } else if (cc === CC_LESS_THAN) {
2108
+ // U+003C LESS-THAN SIGN (<)
2109
+ // Switch to the script data escaped less-than sign state.
2110
+ state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2111
+ pos++;
2112
+ } else {
2113
+ // Anything else
2114
+ // Emit the current input character as a character token.
2115
+ pos++;
2116
+ }
2117
+ break;
2118
+
2119
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
2120
+ case STATE_SCRIPT_DATA_ESCAPED_DASH:
2121
+ // Consume the next input character:
2122
+ // U+002D HYPHEN-MINUS (-)
2123
+ // Switch to the script data escaped dash dash state. Emit a U+002D
2124
+ // HYPHEN-MINUS character token.
2125
+ if (cc === CC_HYPHEN_MINUS) {
2126
+ state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
2127
+ pos++;
2128
+ } else if (cc === CC_LESS_THAN) {
2129
+ // U+003C LESS-THAN SIGN (<)
2130
+ // Switch to the script data escaped less-than sign state.
2131
+ state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2132
+ pos++;
2133
+ } else {
2134
+ // Anything else
2135
+ // Switch to the script data escaped state. Emit the current input character
2136
+ // as a character token.
2137
+ state = STATE_SCRIPT_DATA_ESCAPED;
2138
+ pos++;
2139
+ }
2140
+ break;
2141
+
2142
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
2143
+ case STATE_SCRIPT_DATA_ESCAPED_DASH_DASH:
2144
+ // Consume the next input character:
2145
+ // U+002D HYPHEN-MINUS (-)
2146
+ // Emit a U+002D HYPHEN-MINUS character token.
2147
+ if (cc === CC_HYPHEN_MINUS) {
2148
+ pos++;
2149
+ } else if (cc === CC_LESS_THAN) {
2150
+ // U+003C LESS-THAN SIGN (<)
2151
+ // Switch to the script data escaped less-than sign state.
2152
+ state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
2153
+ pos++;
2154
+ } else if (cc === CC_GREATER_THAN) {
2155
+ // U+003E GREATER-THAN SIGN (>)
2156
+ // Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
2157
+ // character token.
2158
+ state = STATE_SCRIPT_DATA;
2159
+ pos++;
2160
+ } else {
2161
+ // Anything else
2162
+ // Switch to the script data escaped state. Emit the current input character
2163
+ // as a character token.
2164
+ state = STATE_SCRIPT_DATA_ESCAPED;
2165
+ pos++;
2166
+ }
2167
+ break;
2168
+
2169
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
2170
+ case STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
2171
+ // Consume the next input character:
2172
+ // U+002F SOLIDUS (/)
2173
+ // Set the temporary buffer to the empty string. Switch to the script data
2174
+ // escaped end tag open state.
2175
+ if (cc === CC_SOLIDUS) {
2176
+ tempBuffer = "";
2177
+ state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
2178
+ pos++;
2179
+ } else if (isAsciiAlpha(cc)) {
2180
+ // ASCII alpha
2181
+ // Set the temporary buffer to the empty string. Emit a U+003C LESS-THAN
2182
+ // SIGN character token. Reconsume in the script data double escape start
2183
+ // state.
2184
+ tempBuffer = "";
2185
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START;
2186
+ // Reconsume
2187
+ } else {
2188
+ // Anything else
2189
+ // Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
2190
+ // data escaped state.
2191
+ state = STATE_SCRIPT_DATA_ESCAPED;
2192
+ // Reconsume
2193
+ }
2194
+ break;
2195
+
2196
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
2197
+ case STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN:
2198
+ // Consume the next input character:
2199
+ // ASCII alpha
2200
+ // Create a new end tag token, set its tag name to the empty string.
2201
+ // Reconsume in the script data escaped end tag name state.
2202
+ if (isAsciiAlpha(cc)) {
2203
+ tagNameStart = pos;
2204
+ state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME;
2205
+ // Reconsume
2206
+ } else {
2207
+ // Anything else
2208
+ // Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
2209
+ // character token. Reconsume in the script data escaped state.
2210
+ state = STATE_SCRIPT_DATA_ESCAPED;
2211
+ // Reconsume
2212
+ }
2213
+ break;
2214
+
2215
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
2216
+ case STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME:
2217
+ // Consume the next input character:
2218
+ // U+0009 CHARACTER TABULATION (tab)
2219
+ // U+000A LINE FEED (LF)
2220
+ // U+000C FORM FEED (FF)
2221
+ // U+0020 SPACE
2222
+ // If the current end tag token is an appropriate end tag token, then switch
2223
+ // to the before attribute name state. Otherwise, treat it as per the
2224
+ // "anything else" entry below.
2225
+ if (isSpace(cc)) {
2226
+ tagNameEnd = pos;
2227
+ if (
2228
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2229
+ lastOpenTagName
2230
+ ) {
2231
+ state = STATE_BEFORE_ATTRIBUTE_NAME;
2232
+ pos++;
2233
+ } else {
2234
+ state = STATE_SCRIPT_DATA_ESCAPED;
2235
+ }
2236
+ } else if (cc === CC_SOLIDUS) {
2237
+ // U+002F SOLIDUS (/)
2238
+ // If the current end tag token is an appropriate end tag token, then switch
2239
+ // to the self-closing start tag state. Otherwise, treat it as per the
2240
+ // "anything else" entry below.
2241
+ tagNameEnd = pos;
2242
+ if (
2243
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2244
+ lastOpenTagName
2245
+ ) {
2246
+ state = STATE_SELF_CLOSING_START_TAG;
2247
+ pos++;
2248
+ } else {
2249
+ state = STATE_SCRIPT_DATA_ESCAPED;
2250
+ }
2251
+ } else if (cc === CC_GREATER_THAN) {
2252
+ // U+003E GREATER-THAN SIGN (>)
2253
+ // If the current end tag token is an appropriate end tag token, then switch
2254
+ // to the data state and emit the current tag token. Otherwise, treat it as
2255
+ // per the "anything else" entry below.
2256
+ tagNameEnd = pos;
2257
+ if (
2258
+ input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
2259
+ lastOpenTagName
2260
+ ) {
2261
+ flushText(tagStart);
2262
+ state = STATE_DATA;
2263
+ pos = emitCloseTag(pos + 1);
2264
+ } else {
2265
+ state = STATE_SCRIPT_DATA_ESCAPED;
2266
+ }
2267
+ } else if (isAsciiAlpha(cc)) {
2268
+ // ASCII upper alpha / ASCII lower alpha
2269
+ pos++;
2270
+ } else {
2271
+ // Anything else
2272
+ // Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
2273
+ // token, and a character token for each of the characters in the temporary
2274
+ // buffer (in the order they were added to the buffer). Reconsume in the
2275
+ // script data escaped state.
2276
+ state = STATE_SCRIPT_DATA_ESCAPED;
2277
+ // Reconsume
2278
+ }
2279
+ break;
2280
+
2281
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
2282
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START:
2283
+ // Consume the next input character:
2284
+ // U+0009 CHARACTER TABULATION (tab)
2285
+ // U+000A LINE FEED (LF)
2286
+ // U+000C FORM FEED (FF)
2287
+ // U+0020 SPACE
2288
+ // U+002F SOLIDUS (/)
2289
+ // U+003E GREATER-THAN SIGN (>)
2290
+ // If the temporary buffer is the string "script", then switch to the script
2291
+ // data double escaped state. Otherwise, switch to the script data escaped
2292
+ // state. Emit the current input character as a character token.
2293
+ if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
2294
+ state =
2295
+ tempBuffer === "script"
2296
+ ? STATE_SCRIPT_DATA_DOUBLE_ESCAPED
2297
+ : STATE_SCRIPT_DATA_ESCAPED;
2298
+ pos++;
2299
+ } else if (isAsciiUpperAlpha(cc)) {
2300
+ // ASCII upper alpha
2301
+ // Append the lowercase version of the current input character (add 0x0020
2302
+ // to the character's code point) to the temporary buffer. Emit the current
2303
+ // input character as a character token.
2304
+ tempBuffer += String.fromCharCode(cc + 0x20);
2305
+ pos++;
2306
+ } else if (isAsciiLowerAlpha(cc)) {
2307
+ // ASCII lower alpha
2308
+ // Append the current input character to the temporary buffer. Emit the
2309
+ // current input character as a character token.
2310
+ tempBuffer += String.fromCharCode(cc);
2311
+ pos++;
2312
+ } else {
2313
+ // Anything else
2314
+ // Reconsume in the script data escaped state.
2315
+ state = STATE_SCRIPT_DATA_ESCAPED;
2316
+ // Reconsume
2317
+ }
2318
+ break;
2319
+
2320
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
2321
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED:
2322
+ // Consume the next input character:
2323
+ // U+002D HYPHEN-MINUS (-)
2324
+ // Switch to the script data double escaped dash state. Emit a U+002D
2325
+ // HYPHEN-MINUS character token.
2326
+ if (cc === CC_HYPHEN_MINUS) {
2327
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
2328
+ pos++;
2329
+ } else if (cc === CC_LESS_THAN) {
2330
+ // U+003C LESS-THAN SIGN (<)
2331
+ // Switch to the script data double escaped less-than sign state. Emit a
2332
+ // U+003C LESS-THAN SIGN character token.
2333
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
2334
+ pos++;
2335
+ } else {
2336
+ // Anything else
2337
+ // Emit the current input character as a character token.
2338
+ pos++;
2339
+ }
2340
+ break;
2341
+
2342
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
2343
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
2344
+ // Consume the next input character:
2345
+ // U+002D HYPHEN-MINUS (-)
2346
+ // Switch to the script data double escaped dash dash state. Emit a U+002D
2347
+ // HYPHEN-MINUS character token.
2348
+ if (cc === CC_HYPHEN_MINUS) {
2349
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
2350
+ pos++;
2351
+ } else if (cc === CC_LESS_THAN) {
2352
+ // U+003C LESS-THAN SIGN (<)
2353
+ // Switch to the script data double escaped less-than sign state. Emit a
2354
+ // U+003C LESS-THAN SIGN character token.
2355
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
2356
+ pos++;
2357
+ } else {
2358
+ // Anything else
2359
+ // Switch to the script data double escaped state. Emit the current input
2360
+ // character as a character token.
2361
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2362
+ pos++;
2363
+ }
2364
+ break;
2365
+
2366
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
2367
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
2368
+ // Consume the next input character:
2369
+ // U+002D HYPHEN-MINUS (-)
2370
+ // Emit a U+002D HYPHEN-MINUS character token.
2371
+ if (cc === CC_HYPHEN_MINUS) {
2372
+ pos++;
2373
+ } else if (cc === CC_LESS_THAN) {
2374
+ // U+003C LESS-THAN SIGN (<)
2375
+ // Switch to the script data double escaped less-than sign state. Emit a
2376
+ // U+003C LESS-THAN SIGN character token.
2377
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
2378
+ pos++;
2379
+ } else if (cc === CC_GREATER_THAN) {
2380
+ // U+003E GREATER-THAN SIGN (>)
2381
+ // Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
2382
+ // character token.
2383
+ state = STATE_SCRIPT_DATA;
2384
+ pos++;
2385
+ } else {
2386
+ // Anything else
2387
+ // Switch to the script data double escaped state. Emit the current input
2388
+ // character as a character token.
2389
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2390
+ pos++;
2391
+ }
2392
+ break;
2393
+
2394
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
2395
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
2396
+ // Consume the next input character:
2397
+ // U+002F SOLIDUS (/)
2398
+ // Set the temporary buffer to the empty string. Switch to the script data
2399
+ // double escape end state. Emit a U+002F SOLIDUS character token.
2400
+ if (cc === CC_SOLIDUS) {
2401
+ tempBuffer = "";
2402
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END;
2403
+ pos++;
2404
+ } else {
2405
+ // Anything else
2406
+ // Reconsume in the script data double escaped state.
2407
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2408
+ // Reconsume
2409
+ }
2410
+ break;
2411
+
2412
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
2413
+ case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END:
2414
+ // Consume the next input character:
2415
+ // U+0009 CHARACTER TABULATION (tab)
2416
+ // U+000A LINE FEED (LF)
2417
+ // U+000C FORM FEED (FF)
2418
+ // U+0020 SPACE
2419
+ // U+002F SOLIDUS (/)
2420
+ // U+003E GREATER-THAN SIGN (>)
2421
+ // If the temporary buffer is the string "script", then switch to the script
2422
+ // data escaped state. Otherwise, switch to the script data double escaped
2423
+ // state. Emit the current input character as a character token.
2424
+ if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
2425
+ state =
2426
+ tempBuffer === "script"
2427
+ ? STATE_SCRIPT_DATA_ESCAPED
2428
+ : STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2429
+ pos++;
2430
+ } else if (isAsciiUpperAlpha(cc)) {
2431
+ // ASCII upper alpha
2432
+ // Append the lowercase version of the current input character (add 0x0020
2433
+ // to the character's code point) to the temporary buffer. Emit the current
2434
+ // input character as a character token.
2435
+ if (tempBuffer.length < 6) {
2436
+ tempBuffer += String.fromCharCode(cc + 0x20);
2437
+ }
2438
+ pos++;
2439
+ } else if (isAsciiLowerAlpha(cc)) {
2440
+ // ASCII lower alpha
2441
+ // Append the current input character to the temporary buffer. Emit the
2442
+ // current input character as a character token.
2443
+ if (tempBuffer.length < 6) {
2444
+ tempBuffer += String.fromCharCode(cc);
2445
+ }
2446
+ pos++;
2447
+ } else {
2448
+ // Anything else
2449
+ // Reconsume in the script data double escaped state.
2450
+ state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
2451
+ // Reconsume
2452
+ }
2453
+ break;
2454
+
2455
+ // https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
2456
+ case STATE_PLAINTEXT:
2457
+ // Consume the next input character:
2458
+ // Anything else
2459
+ // Emit the current input character as a character token.
2460
+ pos++;
2461
+ break;
2462
+
2463
+ // https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
2464
+ case STATE_CHARACTER_REFERENCE:
2465
+ // Set the temporary buffer to the empty string. Append a U+0026
2466
+ // AMPERSAND (&) character to the temporary buffer.
2467
+ // Consume the next input character:
2468
+ if (isAsciiAlphanumeric(cc)) {
2469
+ // ASCII alphanumeric
2470
+ // Reconsume in the named character reference state.
2471
+ state = STATE_NAMED_CHARACTER_REFERENCE;
2472
+ // Reconsume
2473
+ } else if (cc === CC_NUMBER_SIGN) {
2474
+ // U+0023 NUMBER SIGN (#)
2475
+ // Append the current input character to the temporary buffer.
2476
+ // Switch to the numeric character reference state.
2477
+ state = STATE_NUMERIC_CHARACTER_REFERENCE;
2478
+ pos++;
2479
+ } else {
2480
+ // Anything else
2481
+ // Flush code points consumed as a character reference.
2482
+ // Reconsume in the return state.
2483
+ state = returnState;
2484
+ // Reconsume
2485
+ }
2486
+ break;
2487
+
2488
+ // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
2489
+ case STATE_NAMED_CHARACTER_REFERENCE:
2490
+ // Consume the maximum number of characters possible, where the
2491
+ // consumed characters are one of the identifiers in the first
2492
+ // column of the named character references table. Append each
2493
+ // character to the temporary buffer when it's consumed.
2494
+ //
2495
+ // TODO(named-entities): The WHATWG spec requires matching against
2496
+ // the full named character references table (~2,000 entries).
2497
+ // For now, we scan past the entity name without table lookup to keep
2498
+ // the core tokenizer lightweight. A follow-up PR will provide a build-time
2499
+ // script to generate a compact Trie/Map or dynamic import for the full
2500
+ // table, along with the `is_consumed_as_part_of_an_attribute` check.
2501
+ // The semantic fallback for consumers (like HtmlParser) is to use the
2502
+ // minimal `decodeHtmlEntities` utility exported below, which guarantees
2503
+ // correctness for URLs (`&amp;`) and common characters without bundle bloat.
2504
+ namedEntityConsumed = 0;
2505
+ while (
2506
+ pos + namedEntityConsumed < len &&
2507
+ isAsciiAlphanumeric(input.charCodeAt(pos + namedEntityConsumed))
2508
+ ) {
2509
+ namedEntityConsumed++;
2510
+ // Safety cap — the longest entity is ~33 chars
2511
+ if (namedEntityConsumed > 33) break;
2512
+ }
2513
+ // Check for trailing semicolon
2514
+ if (
2515
+ pos + namedEntityConsumed < len &&
2516
+ input.charCodeAt(pos + namedEntityConsumed) === CC_SEMICOLON
2517
+ ) {
2518
+ namedEntityConsumed++;
2519
+ }
2520
+ if (namedEntityConsumed > 0) {
2521
+ pos += namedEntityConsumed;
2522
+ state = returnState;
2523
+ } else {
2524
+ // No match — flush code points consumed as a character
2525
+ // reference. Switch to the ambiguous ampersand state.
2526
+ state = STATE_AMBIGUOUS_AMPERSAND;
2527
+ }
2528
+ break;
2529
+
2530
+ // https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
2531
+ case STATE_AMBIGUOUS_AMPERSAND:
2532
+ // Consume the next input character:
2533
+ if (isAsciiAlphanumeric(cc)) {
2534
+ // ASCII alphanumeric
2535
+ // If the character reference was consumed as part of an
2536
+ // attribute, then append the current input character to
2537
+ // the current attribute's value. Otherwise, emit the
2538
+ // current input character as a character token.
2539
+ pos++;
2540
+ } else if (cc === CC_SEMICOLON) {
2541
+ // U+003B SEMICOLON (;)
2542
+ // This is an unknown-named-character-reference parse error.
2543
+ // Reconsume in the return state.
2544
+ state = returnState;
2545
+ // Reconsume
2546
+ } else {
2547
+ // Anything else
2548
+ // Reconsume in the return state.
2549
+ state = returnState;
2550
+ // Reconsume
2551
+ }
2552
+ break;
2553
+
2554
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
2555
+ case STATE_NUMERIC_CHARACTER_REFERENCE:
2556
+ // Set the character reference code to zero (0).
2557
+ // Consume the next input character:
2558
+ if (cc === 0x78 || cc === 0x58) {
2559
+ // U+0078 LATIN SMALL LETTER X
2560
+ // U+0058 LATIN CAPITAL LETTER X
2561
+ // Append the current input character to the temporary
2562
+ // buffer. Switch to the hexadecimal character reference
2563
+ // start state.
2564
+ state = STATE_HEXADECIMAL_CHARACTER_REFERENCE_START;
2565
+ pos++;
2566
+ } else {
2567
+ // Anything else
2568
+ // Reconsume in the decimal character reference start state.
2569
+ state = STATE_DECIMAL_CHARACTER_REFERENCE_START;
2570
+ // Reconsume
2571
+ }
2572
+ break;
2573
+
2574
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
2575
+ case STATE_HEXADECIMAL_CHARACTER_REFERENCE_START:
2576
+ // Consume the next input character:
2577
+ // ASCII hex digit: reconsume in the hexadecimal character reference state.
2578
+ // Anything else: absence-of-digits-in-numeric-character-reference parse
2579
+ // error. Flush code points consumed as a character reference. Reconsume
2580
+ // in the return state.
2581
+ state = isAsciiHexDigit(cc)
2582
+ ? STATE_HEXADECIMAL_CHARACTER_REFERENCE
2583
+ : returnState;
2584
+ // Reconsume
2585
+ break;
2586
+
2587
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
2588
+ case STATE_DECIMAL_CHARACTER_REFERENCE_START:
2589
+ // Consume the next input character:
2590
+ // ASCII digit: reconsume in the decimal character reference state.
2591
+ // Anything else: absence-of-digits-in-numeric-character-reference parse
2592
+ // error. Flush code points consumed as a character reference. Reconsume
2593
+ // in the return state.
2594
+ state = isAsciiDigit(cc)
2595
+ ? STATE_DECIMAL_CHARACTER_REFERENCE
2596
+ : returnState;
2597
+ // Reconsume
2598
+ break;
2599
+
2600
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
2601
+ case STATE_HEXADECIMAL_CHARACTER_REFERENCE:
2602
+ // Consume the next input character:
2603
+ if (isAsciiHexDigit(cc)) {
2604
+ // ASCII digit / upper hex / lower hex
2605
+ // Multiply the character reference code by 16. Add a numeric
2606
+ // version of the current input character to the character
2607
+ // reference code.
2608
+ pos++;
2609
+ } else if (cc === CC_SEMICOLON) {
2610
+ // U+003B SEMICOLON
2611
+ // Switch to the numeric character reference end state.
2612
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2613
+ pos++;
2614
+ } else {
2615
+ // Anything else
2616
+ // This is a missing-semicolon-after-character-reference
2617
+ // parse error. Reconsume in the numeric character reference
2618
+ // end state.
2619
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2620
+ // Reconsume
2621
+ }
2622
+ break;
2623
+
2624
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
2625
+ case STATE_DECIMAL_CHARACTER_REFERENCE:
2626
+ // Consume the next input character:
2627
+ if (isAsciiDigit(cc)) {
2628
+ // ASCII digit
2629
+ // Multiply the character reference code by 10. Add a numeric
2630
+ // version of the current input character (subtract 0x0030
2631
+ // from the character's code point) to the character reference
2632
+ // code.
2633
+ pos++;
2634
+ } else if (cc === CC_SEMICOLON) {
2635
+ // U+003B SEMICOLON
2636
+ // Switch to the numeric character reference end state.
2637
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2638
+ pos++;
2639
+ } else {
2640
+ // Anything else
2641
+ // This is a missing-semicolon-after-character-reference
2642
+ // parse error. Reconsume in the numeric character reference
2643
+ // end state.
2644
+ state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
2645
+ // Reconsume
2646
+ }
2647
+ break;
2648
+
2649
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
2650
+ case STATE_NUMERIC_CHARACTER_REFERENCE_END:
2651
+ // Check the character reference code (validation omitted for
2652
+ // the scanner — we don't decode, just skip past the entity).
2653
+ // Flush code points consumed as a character reference.
2654
+ // Switch to the return state.
2655
+ state = returnState;
2656
+ // Reconsume
2657
+ break;
2658
+
2659
+ default:
2660
+ pos++;
2661
+ }
2662
+ }
2663
+
2664
+ if (
2665
+ (state >= STATE_MARKUP_DECLARATION_OPEN && state <= STATE_BOGUS_COMMENT) ||
2666
+ (state >= STATE_COMMENT_LESS_THAN_SIGN &&
2667
+ state <= STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH) ||
2668
+ (state >= STATE_CDATA_SECTION && state <= STATE_CDATA_SECTION_END)
2669
+ ) {
2670
+ if (callbacks.comment !== undefined) {
2671
+ pos = callbacks.comment(input, commentStart, len);
2672
+ }
2673
+ } else if (state >= STATE_DOCTYPE && state <= STATE_BOGUS_DOCTYPE) {
2674
+ if (callbacks.doctype !== undefined) {
2675
+ pos = callbacks.doctype(input, commentStart, len);
2676
+ }
2677
+ } else if (textStart < len && callbacks.text !== undefined) {
2678
+ callbacks.text(input, textStart, len);
2679
+ }
2680
+
2681
+ return pos;
2682
+ };
2683
+
2684
+ walkHtmlTokens.QUOTE_NONE = QUOTE_NONE;
2685
+ walkHtmlTokens.QUOTE_SINGLE = QUOTE_SINGLE;
2686
+ walkHtmlTokens.QUOTE_DOUBLE = QUOTE_DOUBLE;
2687
+
2688
+ const MINIMAL_ENTITIES = {
2689
+ amp: "&",
2690
+ lt: "<",
2691
+ gt: ">",
2692
+ quot: '"',
2693
+ apos: "'",
2694
+ nbsp: "\u00A0"
2695
+ };
2696
+
2697
+ /**
2698
+ * Minimal entity decoder for safe string resolution without bundle bloat.
2699
+ * Decodes the core URL-safe named entities and all numeric references.
2700
+ * Leaves unknown entities as literal strings to prevent silent character drops.
2701
+ * @param {string} str the raw string from the token slice
2702
+ * @returns {string} decoded string
2703
+ */
2704
+ walkHtmlTokens.decodeHtmlEntities = (str) => {
2705
+ if (!str.includes("&")) return str;
2706
+
2707
+ return str.replace(/&(#?[0-9a-zA-Z]+);?/g, (match, entity) => {
2708
+ // Decimal numeric reference: &#65;
2709
+ if (entity.charCodeAt(0) === 0x23 /* # */) {
2710
+ const isHex =
2711
+ entity.charCodeAt(1) === 0x78 || entity.charCodeAt(1) === 0x58; // x or X
2712
+ const code = isHex
2713
+ ? Number.parseInt(entity.slice(2), 16)
2714
+ : Number.parseInt(entity.slice(1), 10);
2715
+ if (!Number.isNaN(code)) {
2716
+ // Handle basic out-of-bounds (minimal approximation of WHATWG replacement char)
2717
+ return code > 0x10ffff ? "\uFFFD" : String.fromCodePoint(code);
2718
+ }
2719
+ return match; // Invalid numeric (e.g. &#;)
2720
+ }
2721
+
2722
+ // Known minimal named reference: &amp;
2723
+ const key = entity.toLowerCase();
2724
+ if (Object.prototype.hasOwnProperty.call(MINIMAL_ENTITIES, key)) {
2725
+ return /** @type {Record<string, string>} */ (MINIMAL_ENTITIES)[key];
2726
+ }
2727
+
2728
+ // Unknown named entity: preserve as literal to avoid data loss
2729
+ return match;
2730
+ });
2731
+ };
2732
+
2733
+ module.exports = walkHtmlTokens;