webpack 5.106.2 → 5.107.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/lib/APIPlugin.js +1 -1
- package/lib/BannerPlugin.js +3 -4
- package/lib/Cache.js +3 -6
- package/lib/Chunk.js +21 -25
- package/lib/ChunkGroup.js +57 -15
- package/lib/CompatibilityPlugin.js +8 -7
- package/lib/Compilation.js +67 -37
- package/lib/Compiler.js +4 -13
- package/lib/ContextModule.js +2 -2
- package/lib/DefinePlugin.js +2 -2
- package/lib/Dependency.js +22 -1
- package/lib/DependencyTemplate.js +2 -1
- package/lib/EnvironmentPlugin.js +1 -1
- package/lib/EvalSourceMapDevToolPlugin.js +8 -10
- package/lib/ExportsInfo.js +30 -34
- package/lib/ExternalModule.js +91 -26
- package/lib/ExternalModuleFactoryPlugin.js +7 -1
- package/lib/FileSystemInfo.js +187 -72
- package/lib/Generator.js +3 -3
- package/lib/HotModuleReplacementPlugin.js +26 -8
- package/lib/IgnorePlugin.js +2 -1
- package/lib/Module.js +20 -19
- package/lib/ModuleFactory.js +1 -1
- package/lib/ModuleNotFoundError.js +3 -84
- package/lib/ModuleSourceTypeConstants.js +51 -19
- package/lib/ModuleTypeConstants.js +12 -3
- package/lib/MultiCompiler.js +2 -2
- package/lib/NodeStuffPlugin.js +1 -1
- package/lib/NormalModule.js +119 -77
- package/lib/NormalModuleFactory.js +47 -27
- package/lib/Parser.js +1 -1
- package/lib/ProgressPlugin.js +129 -56
- package/lib/RuntimeGlobals.js +5 -5
- package/lib/RuntimeModule.js +9 -7
- package/lib/RuntimePlugin.js +12 -1
- package/lib/SourceMapDevToolPlugin.js +250 -49
- package/lib/Template.js +1 -1
- package/lib/TemplatedPathPlugin.js +22 -4
- package/lib/WarnCaseSensitiveModulesPlugin.js +70 -2
- package/lib/WarnDeprecatedOptionPlugin.js +1 -1
- package/lib/WarnNoModeSetPlugin.js +16 -1
- package/lib/Watching.js +2 -3
- package/lib/WebpackError.js +3 -77
- package/lib/WebpackIsIncludedPlugin.js +1 -1
- package/lib/WebpackOptionsApply.js +13 -1
- package/lib/asset/AssetBytesGenerator.js +12 -8
- package/lib/asset/AssetGenerator.js +36 -22
- package/lib/asset/AssetModulesPlugin.js +6 -8
- package/lib/asset/AssetSourceGenerator.js +12 -8
- package/lib/buildChunkGraph.js +4 -6
- package/lib/cache/PackFileCacheStrategy.js +4 -4
- package/lib/cli.js +3 -1
- package/lib/config/defaults.js +197 -10
- package/lib/config/normalization.js +3 -1
- package/lib/css/CssGenerator.js +320 -105
- package/lib/css/CssInjectStyleRuntimeModule.js +44 -42
- package/lib/css/CssLoadingRuntimeModule.js +22 -4
- package/lib/{CssModule.js → css/CssModule.js} +15 -15
- package/lib/css/CssModulesPlugin.js +168 -88
- package/lib/css/CssParser.js +566 -269
- package/lib/css/walkCssTokens.js +148 -2
- package/lib/dependencies/AMDRequireDependenciesBlockParserPlugin.js +1 -1
- package/lib/dependencies/CommonJsDependencyHelpers.js +63 -0
- package/lib/dependencies/CommonJsExportRequireDependency.js +54 -10
- package/lib/dependencies/CommonJsExportsParserPlugin.js +1 -1
- package/lib/dependencies/CommonJsFullRequireDependency.js +32 -9
- package/lib/dependencies/CommonJsImportsParserPlugin.js +112 -4
- package/lib/dependencies/CommonJsRequireDependency.js +67 -4
- package/lib/dependencies/ContextDependency.js +1 -1
- package/lib/dependencies/ContextDependencyHelpers.js +1 -1
- package/lib/dependencies/CreateRequireParserPlugin.js +1 -1
- package/lib/dependencies/CriticalDependencyWarning.js +1 -1
- package/lib/dependencies/CssIcssExportDependency.js +332 -67
- package/lib/dependencies/CssIcssImportDependency.js +49 -7
- package/lib/dependencies/CssIcssSymbolDependency.js +11 -3
- package/lib/dependencies/CssImportDependency.js +8 -0
- package/lib/dependencies/CssUrlDependency.js +28 -2
- package/lib/dependencies/HarmonyDetectionParserPlugin.js +22 -2
- package/lib/dependencies/HarmonyExportDependencyParserPlugin.js +8 -7
- package/lib/dependencies/HarmonyExportExpressionDependency.js +22 -14
- package/lib/dependencies/HarmonyExportImportedSpecifierDependency.js +110 -3
- package/lib/dependencies/HarmonyImportDependency.js +10 -2
- package/lib/dependencies/HarmonyImportDependencyParserPlugin.js +22 -1
- package/lib/dependencies/HarmonyImportSpecifierDependency.js +1 -1
- package/lib/{HarmonyLinkingError.js → dependencies/HarmonyLinkingError.js} +5 -3
- package/lib/dependencies/HtmlInlineScriptDependency.js +133 -0
- package/lib/dependencies/HtmlInlineStyleDependency.js +101 -0
- package/lib/dependencies/HtmlScriptSrcDependency.js +557 -0
- package/lib/dependencies/HtmlSourceDependency.js +128 -0
- package/lib/dependencies/ImportMetaContextDependencyParserPlugin.js +1 -1
- package/lib/dependencies/ImportParserPlugin.js +2 -2
- package/lib/dependencies/ImportPhase.js +1 -1
- package/lib/dependencies/RequireIncludeDependencyParserPlugin.js +1 -1
- package/lib/{RequireJsStuffPlugin.js → dependencies/RequireJsStuffPlugin.js} +7 -7
- package/lib/dependencies/SystemPlugin.js +1 -1
- package/lib/dependencies/WebAssemblyImportDependency.js +1 -1
- package/lib/dependencies/WorkerPlugin.js +2 -2
- package/lib/{DelegatedModule.js → dll/DelegatedModule.js} +31 -31
- package/lib/{DelegatedModuleFactoryPlugin.js → dll/DelegatedModuleFactoryPlugin.js} +4 -4
- package/lib/{DelegatedPlugin.js → dll/DelegatedPlugin.js} +2 -2
- package/lib/{DllEntryPlugin.js → dll/DllEntryPlugin.js} +4 -4
- package/lib/{DllModule.js → dll/DllModule.js} +24 -24
- package/lib/{DllModuleFactory.js → dll/DllModuleFactory.js} +4 -4
- package/lib/{DllPlugin.js → dll/DllPlugin.js} +6 -5
- package/lib/{DllReferencePlugin.js → dll/DllReferencePlugin.js} +14 -14
- package/lib/{LibManifestPlugin.js → dll/LibManifestPlugin.js} +9 -9
- package/lib/{AsyncDependencyToInitialChunkError.js → errors/AsyncDependencyToInitialChunkError.js} +2 -2
- package/lib/errors/BuildCycleError.js +1 -1
- package/lib/{ChunkRenderError.js → errors/ChunkRenderError.js} +1 -1
- package/lib/{CodeGenerationError.js → errors/CodeGenerationError.js} +1 -1
- package/lib/{CommentCompilationWarning.js → errors/CommentCompilationWarning.js} +3 -3
- package/lib/{ConcurrentCompilationError.js → errors/ConcurrentCompilationError.js} +4 -2
- package/lib/{EnvironmentNotSupportAsyncWarning.js → errors/EnvironmentNotSupportAsyncWarning.js} +4 -4
- package/lib/{HookWebpackError.js → errors/HookWebpackError.js} +5 -5
- package/lib/{IgnoreErrorModuleFactory.js → errors/IgnoreErrorModuleFactory.js} +4 -4
- package/lib/{InvalidDependenciesModuleWarning.js → errors/InvalidDependenciesModuleWarning.js} +3 -3
- package/lib/errors/JSONParseError.js +114 -0
- package/lib/{ModuleBuildError.js → errors/ModuleBuildError.js} +5 -5
- package/lib/{ModuleDependencyError.js → errors/ModuleDependencyError.js} +2 -2
- package/lib/{ModuleDependencyWarning.js → errors/ModuleDependencyWarning.js} +4 -4
- package/lib/{ModuleError.js → errors/ModuleError.js} +5 -5
- package/lib/{ModuleHashingError.js → errors/ModuleHashingError.js} +1 -1
- package/lib/errors/ModuleNotFoundError.js +91 -0
- package/lib/{ModuleParseError.js → errors/ModuleParseError.js} +8 -6
- package/lib/{ModuleRestoreError.js → errors/ModuleRestoreError.js} +1 -1
- package/lib/{ModuleStoreError.js → errors/ModuleStoreError.js} +1 -1
- package/lib/{ModuleWarning.js → errors/ModuleWarning.js} +5 -5
- package/lib/{NodeStuffInWebError.js → errors/NodeStuffInWebError.js} +4 -4
- package/lib/errors/NonErrorEmittedError.js +28 -0
- package/lib/{UnhandledSchemeError.js → errors/UnhandledSchemeError.js} +2 -2
- package/lib/{UnsupportedFeatureWarning.js → errors/UnsupportedFeatureWarning.js} +3 -3
- package/lib/errors/WebpackError.js +84 -0
- package/lib/html/HtmlGenerator.js +379 -0
- package/lib/html/HtmlModulesPlugin.js +429 -0
- package/lib/html/HtmlParser.js +1489 -0
- package/lib/html/walkHtmlTokens.js +3249 -0
- package/lib/ids/IdHelpers.js +2 -1
- package/lib/index.js +36 -15
- package/lib/javascript/JavascriptModulesPlugin.js +91 -10
- package/lib/javascript/JavascriptParser.js +197 -16
- package/lib/javascript/JavascriptParserHelpers.js +1 -1
- package/lib/json/JsonParser.js +7 -16
- package/lib/library/AbstractLibraryPlugin.js +1 -1
- package/lib/library/EnableLibraryPlugin.js +1 -1
- package/lib/{FalseIIFEUmdWarning.js → library/FalseIIFEUmdWarning.js} +1 -1
- package/lib/library/ModuleLibraryPlugin.js +74 -0
- package/lib/node/NodeEnvironmentPlugin.js +4 -2
- package/lib/node/nodeConsole.js +113 -64
- package/lib/optimize/ConcatenatedModule.js +51 -6
- package/lib/optimize/InnerGraph.js +1 -1
- package/lib/optimize/InnerGraphPlugin.js +11 -1
- package/lib/optimize/MinMaxSizeWarning.js +4 -4
- package/lib/optimize/ModuleConcatenationPlugin.js +15 -7
- package/lib/optimize/RealContentHashPlugin.js +89 -26
- package/lib/optimize/SideEffectsFlagPlugin.js +112 -5
- package/lib/optimize/SplitChunksPlugin.js +5 -5
- package/lib/performance/AssetsOverSizeLimitWarning.js +2 -2
- package/lib/performance/EntrypointsOverSizeLimitWarning.js +2 -2
- package/lib/performance/NoAsyncChunksWarning.js +5 -3
- package/lib/performance/SizeLimitsPlugin.js +1 -1
- package/lib/prefetch/ChunkPrefetchTriggerRuntimeModule.js +4 -1
- package/lib/rules/UseEffectRulePlugin.js +4 -3
- package/lib/runtime/AutoPublicPathRuntimeModule.js +3 -3
- package/lib/runtime/GetChunkFilenameRuntimeModule.js +5 -5
- package/lib/runtime/MakeDeferredNamespaceObjectRuntime.js +119 -13
- package/lib/runtime/SetAnonymousDefaultNameRuntimeModule.js +35 -0
- package/lib/schemes/DataUriPlugin.js +13 -1
- package/lib/schemes/VirtualUrlPlugin.js +1 -1
- package/lib/serialization/SerializerMiddleware.js +2 -2
- package/lib/sharing/ConsumeSharedPlugin.js +4 -10
- package/lib/sharing/ConsumeSharedRuntimeModule.js +8 -4
- package/lib/sharing/ProvideSharedModule.js +1 -1
- package/lib/sharing/ProvideSharedPlugin.js +5 -5
- package/lib/sharing/resolveMatchedConfigs.js +1 -1
- package/lib/stats/DefaultStatsFactoryPlugin.js +2 -2
- package/lib/stats/DefaultStatsPresetPlugin.js +1 -1
- package/lib/stats/DefaultStatsPrinterPlugin.js +1 -1
- package/lib/stats/StatsFactory.js +1 -1
- package/lib/typescript/TypeScriptPlugin.js +210 -0
- package/lib/url/URLParserPlugin.js +2 -2
- package/lib/util/AsyncQueue.js +2 -2
- package/lib/util/Hash.js +2 -2
- package/lib/util/LocConverter.js +53 -0
- package/lib/util/SortableSet.js +1 -1
- package/lib/util/cleverMerge.js +2 -2
- package/lib/util/comparators.js +3 -3
- package/lib/util/concatenate.js +3 -3
- package/lib/util/conventions.js +42 -1
- package/lib/util/createMappings.js +118 -0
- package/lib/{formatLocation.js → util/formatLocation.js} +2 -2
- package/lib/{SizeFormatHelpers.js → util/formatSize.js} +3 -1
- package/lib/util/fs.js +8 -8
- package/lib/util/hash/md4.js +1 -1
- package/lib/util/hash/xxhash64.js +1 -1
- package/lib/util/identifier.js +48 -0
- package/lib/util/internalSerializables.js +35 -19
- package/lib/util/magicComment.js +10 -7
- package/lib/util/parseJson.js +2 -73
- package/lib/util/source.js +21 -0
- package/lib/util/topologicalSort.js +69 -0
- package/lib/wasm-async/AsyncWebAssemblyModulesPlugin.js +3 -4
- package/lib/wasm-async/AsyncWebAssemblyParser.js +1 -1
- package/lib/wasm-sync/UnsupportedWebAssemblyFeatureError.js +5 -3
- package/lib/wasm-sync/WasmFinalizeExportsPlugin.js +1 -1
- package/lib/wasm-sync/WebAssemblyInInitialChunkError.js +5 -3
- package/lib/webpack.js +3 -1
- package/package.json +24 -22
- package/schemas/WebpackOptions.check.js +1 -1
- package/schemas/WebpackOptions.json +129 -12
- package/schemas/plugins/{DllPlugin.check.d.ts → HtmlGeneratorOptions.check.d.ts} +1 -1
- package/schemas/plugins/HtmlGeneratorOptions.check.js +6 -0
- package/schemas/plugins/HtmlGeneratorOptions.json +3 -0
- package/schemas/plugins/ProgressPlugin.check.js +1 -1
- package/schemas/plugins/ProgressPlugin.json +22 -0
- package/schemas/plugins/container/ContainerReferencePlugin.check.js +1 -1
- package/schemas/plugins/container/ContainerReferencePlugin.json +1 -0
- package/schemas/plugins/container/ExternalsType.check.js +1 -1
- package/schemas/plugins/container/ModuleFederationPlugin.check.js +1 -1
- package/schemas/plugins/container/ModuleFederationPlugin.json +1 -0
- package/schemas/plugins/{DllReferencePlugin.check.d.ts → css/CssAutoOrModuleParserOptions.check.d.ts} +1 -1
- package/schemas/plugins/css/CssAutoOrModuleParserOptions.check.js +6 -0
- package/schemas/plugins/css/CssAutoOrModuleParserOptions.json +3 -0
- package/schemas/plugins/dll/DllPlugin.check.d.ts +7 -0
- package/schemas/plugins/dll/DllReferencePlugin.check.d.ts +7 -0
- package/types.d.ts +1153 -233
- package/lib/CaseSensitiveModulesWarning.js +0 -80
- package/lib/GraphHelpers.js +0 -49
- package/lib/NoModeWarning.js +0 -23
- package/lib/css/CssMergeStyleSheetsRuntimeModule.js +0 -57
- /package/lib/{AbstractMethodError.js → errors/AbstractMethodError.js} +0 -0
- /package/schemas/plugins/{DllPlugin.check.js → dll/DllPlugin.check.js} +0 -0
- /package/schemas/plugins/{DllPlugin.json → dll/DllPlugin.json} +0 -0
- /package/schemas/plugins/{DllReferencePlugin.check.js → dll/DllReferencePlugin.check.js} +0 -0
- /package/schemas/plugins/{DllReferencePlugin.json → dll/DllReferencePlugin.json} +0 -0
|
@@ -0,0 +1,3249 @@
|
|
|
1
|
+
/*
|
|
2
|
+
MIT License http://www.opensource.org/licenses/mit-license.php
|
|
3
|
+
Author Raj Aryan (based on SWC parser by Alexander Akait)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
"use strict";
|
|
7
|
+
|
|
8
|
+
// cspell:ignore apos notpre noncharacters DFFF
|
|
9
|
+
|
|
10
|
+
// #region html entities
|
|
11
|
+
// The contents of this region are auto-generated by
|
|
12
|
+
// `tooling/generate-html-entities.js` from `tooling/html-entities.json`.
|
|
13
|
+
// Do not edit by hand — re-run the generator (via `yarn fix:special`) to refresh.
|
|
14
|
+
//
|
|
15
|
+
// WHATWG named character references. Keys are entity names WITHOUT the
|
|
16
|
+
// leading `&` (some end with `;`, others omit it for legacy entities that
|
|
17
|
+
// match without a closing semicolon). Values are the decoded character
|
|
18
|
+
// strings (1–2 UTF-16 code units).
|
|
19
|
+
// Built on a null prototype so bracket lookups (`HTML_ENTITIES[name]`)
|
|
20
|
+
// can't be poisoned by inherited `Object.prototype` keys like `toString`,
|
|
21
|
+
// `constructor`, or `__proto__` — without this, `&toString;` would falsely
|
|
22
|
+
// look like a matched named character reference.
|
|
23
|
+
// prettier-ignore
|
|
24
|
+
// cspell:disable-next-line
|
|
25
|
+
const HTML_ENTITIES = /** @type {Readonly<Record<string, string>>} */ (Object.freeze(Object.assign(Object.create(null), {"AElig":"Æ","AElig;":"Æ","AMP":"&","AMP;":"&","Aacute":"Á","Aacute;":"Á","Abreve;":"Ă","Acirc":"Â","Acirc;":"Â","Acy;":"А","Afr;":"𝔄","Agrave":"À","Agrave;":"À","Alpha;":"Α","Amacr;":"Ā","And;":"⩓","Aogon;":"Ą","Aopf;":"𝔸","ApplyFunction;":"","Aring":"Å","Aring;":"Å","Ascr;":"𝒜","Assign;":"≔","Atilde":"Ã","Atilde;":"Ã","Auml":"Ä","Auml;":"Ä","Backslash;":"∖","Barv;":"⫧","Barwed;":"⌆","Bcy;":"Б","Because;":"∵","Bernoullis;":"ℬ","Beta;":"Β","Bfr;":"𝔅","Bopf;":"𝔹","Breve;":"˘","Bscr;":"ℬ","Bumpeq;":"≎","CHcy;":"Ч","COPY":"©","COPY;":"©","Cacute;":"Ć","Cap;":"⋒","CapitalDifferentialD;":"ⅅ","Cayleys;":"ℭ","Ccaron;":"Č","Ccedil":"Ç","Ccedil;":"Ç","Ccirc;":"Ĉ","Cconint;":"∰","Cdot;":"Ċ","Cedilla;":"¸","CenterDot;":"·","Cfr;":"ℭ","Chi;":"Χ","CircleDot;":"⊙","CircleMinus;":"⊖","CirclePlus;":"⊕","CircleTimes;":"⊗","ClockwiseContourIntegral;":"∲","CloseCurlyDoubleQuote;":"”","CloseCurlyQuote;":"’","Colon;":"∷","Colone;":"⩴","Congruent;":"≡","Conint;":"∯","ContourIntegral;":"∮","Copf;":"ℂ","Coproduct;":"∐","CounterClockwiseContourIntegral;":"∳","Cross;":"⨯","Cscr;":"𝒞","Cup;":"⋓","CupCap;":"≍","DD;":"ⅅ","DDotrahd;":"⤑","DJcy;":"Ђ","DScy;":"Ѕ","DZcy;":"Џ","Dagger;":"‡","Darr;":"↡","Dashv;":"⫤","Dcaron;":"Ď","Dcy;":"Д","Del;":"∇","Delta;":"Δ","Dfr;":"𝔇","DiacriticalAcute;":"´","DiacriticalDot;":"˙","DiacriticalDoubleAcute;":"˝","DiacriticalGrave;":"`","DiacriticalTilde;":"˜","Diamond;":"⋄","DifferentialD;":"ⅆ","Dopf;":"𝔻","Dot;":"¨","DotDot;":"⃜","DotEqual;":"≐","DoubleContourIntegral;":"∯","DoubleDot;":"¨","DoubleDownArrow;":"⇓","DoubleLeftArrow;":"⇐","DoubleLeftRightArrow;":"⇔","DoubleLeftTee;":"⫤","DoubleLongLeftArrow;":"⟸","DoubleLongLeftRightArrow;":"⟺","DoubleLongRightArrow;":"⟹","DoubleRightArrow;":"⇒","DoubleRightTee;":"⊨","DoubleUpArrow;":"⇑","DoubleUpDownArrow;":"⇕","DoubleVerticalBar;":"∥","DownArrow;":"↓","DownArrowBar;":"⤓","DownArrowUpArrow;":"⇵","DownBreve;":"̑","DownLeftRightVector;":"⥐","DownLeftTeeVector;":"⥞","DownLeftVector;":"↽","DownLeftVectorBar;":"⥖","DownRightTeeVector;":"⥟","DownRightVector;":"⇁","DownRightVectorBar;":"⥗","DownTee;":"⊤","DownTeeArrow;":"↧","Downarrow;":"⇓","Dscr;":"𝒟","Dstrok;":"Đ","ENG;":"Ŋ","ETH":"Ð","ETH;":"Ð","Eacute":"É","Eacute;":"É","Ecaron;":"Ě","Ecirc":"Ê","Ecirc;":"Ê","Ecy;":"Э","Edot;":"Ė","Efr;":"𝔈","Egrave":"È","Egrave;":"È","Element;":"∈","Emacr;":"Ē","EmptySmallSquare;":"◻","EmptyVerySmallSquare;":"▫","Eogon;":"Ę","Eopf;":"𝔼","Epsilon;":"Ε","Equal;":"⩵","EqualTilde;":"≂","Equilibrium;":"⇌","Escr;":"ℰ","Esim;":"⩳","Eta;":"Η","Euml":"Ë","Euml;":"Ë","Exists;":"∃","ExponentialE;":"ⅇ","Fcy;":"Ф","Ffr;":"𝔉","FilledSmallSquare;":"◼","FilledVerySmallSquare;":"▪","Fopf;":"𝔽","ForAll;":"∀","Fouriertrf;":"ℱ","Fscr;":"ℱ","GJcy;":"Ѓ","GT":">","GT;":">","Gamma;":"Γ","Gammad;":"Ϝ","Gbreve;":"Ğ","Gcedil;":"Ģ","Gcirc;":"Ĝ","Gcy;":"Г","Gdot;":"Ġ","Gfr;":"𝔊","Gg;":"⋙","Gopf;":"𝔾","GreaterEqual;":"≥","GreaterEqualLess;":"⋛","GreaterFullEqual;":"≧","GreaterGreater;":"⪢","GreaterLess;":"≷","GreaterSlantEqual;":"⩾","GreaterTilde;":"≳","Gscr;":"𝒢","Gt;":"≫","HARDcy;":"Ъ","Hacek;":"ˇ","Hat;":"^","Hcirc;":"Ĥ","Hfr;":"ℌ","HilbertSpace;":"ℋ","Hopf;":"ℍ","HorizontalLine;":"─","Hscr;":"ℋ","Hstrok;":"Ħ","HumpDownHump;":"≎","HumpEqual;":"≏","IEcy;":"Е","IJlig;":"IJ","IOcy;":"Ё","Iacute":"Í","Iacute;":"Í","Icirc":"Î","Icirc;":"Î","Icy;":"И","Idot;":"İ","Ifr;":"ℑ","Igrave":"Ì","Igrave;":"Ì","Im;":"ℑ","Imacr;":"Ī","ImaginaryI;":"ⅈ","Implies;":"⇒","Int;":"∬","Integral;":"∫","Intersection;":"⋂","InvisibleComma;":"","InvisibleTimes;":"","Iogon;":"Į","Iopf;":"𝕀","Iota;":"Ι","Iscr;":"ℐ","Itilde;":"Ĩ","Iukcy;":"І","Iuml":"Ï","Iuml;":"Ï","Jcirc;":"Ĵ","Jcy;":"Й","Jfr;":"𝔍","Jopf;":"𝕁","Jscr;":"𝒥","Jsercy;":"Ј","Jukcy;":"Є","KHcy;":"Х","KJcy;":"Ќ","Kappa;":"Κ","Kcedil;":"Ķ","Kcy;":"К","Kfr;":"𝔎","Kopf;":"𝕂","Kscr;":"𝒦","LJcy;":"Љ","LT":"<","LT;":"<","Lacute;":"Ĺ","Lambda;":"Λ","Lang;":"⟪","Laplacetrf;":"ℒ","Larr;":"↞","Lcaron;":"Ľ","Lcedil;":"Ļ","Lcy;":"Л","LeftAngleBracket;":"⟨","LeftArrow;":"←","LeftArrowBar;":"⇤","LeftArrowRightArrow;":"⇆","LeftCeiling;":"⌈","LeftDoubleBracket;":"⟦","LeftDownTeeVector;":"⥡","LeftDownVector;":"⇃","LeftDownVectorBar;":"⥙","LeftFloor;":"⌊","LeftRightArrow;":"↔","LeftRightVector;":"⥎","LeftTee;":"⊣","LeftTeeArrow;":"↤","LeftTeeVector;":"⥚","LeftTriangle;":"⊲","LeftTriangleBar;":"⧏","LeftTriangleEqual;":"⊴","LeftUpDownVector;":"⥑","LeftUpTeeVector;":"⥠","LeftUpVector;":"↿","LeftUpVectorBar;":"⥘","LeftVector;":"↼","LeftVectorBar;":"⥒","Leftarrow;":"⇐","Leftrightarrow;":"⇔","LessEqualGreater;":"⋚","LessFullEqual;":"≦","LessGreater;":"≶","LessLess;":"⪡","LessSlantEqual;":"⩽","LessTilde;":"≲","Lfr;":"𝔏","Ll;":"⋘","Lleftarrow;":"⇚","Lmidot;":"Ŀ","LongLeftArrow;":"⟵","LongLeftRightArrow;":"⟷","LongRightArrow;":"⟶","Longleftarrow;":"⟸","Longleftrightarrow;":"⟺","Longrightarrow;":"⟹","Lopf;":"𝕃","LowerLeftArrow;":"↙","LowerRightArrow;":"↘","Lscr;":"ℒ","Lsh;":"↰","Lstrok;":"Ł","Lt;":"≪","Map;":"⤅","Mcy;":"М","MediumSpace;":" ","Mellintrf;":"ℳ","Mfr;":"𝔐","MinusPlus;":"∓","Mopf;":"𝕄","Mscr;":"ℳ","Mu;":"Μ","NJcy;":"Њ","Nacute;":"Ń","Ncaron;":"Ň","Ncedil;":"Ņ","Ncy;":"Н","NegativeMediumSpace;":"","NegativeThickSpace;":"","NegativeThinSpace;":"","NegativeVeryThinSpace;":"","NestedGreaterGreater;":"≫","NestedLessLess;":"≪","NewLine;":"\n","Nfr;":"𝔑","NoBreak;":"","NonBreakingSpace;":" ","Nopf;":"ℕ","Not;":"⫬","NotCongruent;":"≢","NotCupCap;":"≭","NotDoubleVerticalBar;":"∦","NotElement;":"∉","NotEqual;":"≠","NotEqualTilde;":"≂̸","NotExists;":"∄","NotGreater;":"≯","NotGreaterEqual;":"≱","NotGreaterFullEqual;":"≧̸","NotGreaterGreater;":"≫̸","NotGreaterLess;":"≹","NotGreaterSlantEqual;":"⩾̸","NotGreaterTilde;":"≵","NotHumpDownHump;":"≎̸","NotHumpEqual;":"≏̸","NotLeftTriangle;":"⋪","NotLeftTriangleBar;":"⧏̸","NotLeftTriangleEqual;":"⋬","NotLess;":"≮","NotLessEqual;":"≰","NotLessGreater;":"≸","NotLessLess;":"≪̸","NotLessSlantEqual;":"⩽̸","NotLessTilde;":"≴","NotNestedGreaterGreater;":"⪢̸","NotNestedLessLess;":"⪡̸","NotPrecedes;":"⊀","NotPrecedesEqual;":"⪯̸","NotPrecedesSlantEqual;":"⋠","NotReverseElement;":"∌","NotRightTriangle;":"⋫","NotRightTriangleBar;":"⧐̸","NotRightTriangleEqual;":"⋭","NotSquareSubset;":"⊏̸","NotSquareSubsetEqual;":"⋢","NotSquareSuperset;":"⊐̸","NotSquareSupersetEqual;":"⋣","NotSubset;":"⊂⃒","NotSubsetEqual;":"⊈","NotSucceeds;":"⊁","NotSucceedsEqual;":"⪰̸","NotSucceedsSlantEqual;":"⋡","NotSucceedsTilde;":"≿̸","NotSuperset;":"⊃⃒","NotSupersetEqual;":"⊉","NotTilde;":"≁","NotTildeEqual;":"≄","NotTildeFullEqual;":"≇","NotTildeTilde;":"≉","NotVerticalBar;":"∤","Nscr;":"𝒩","Ntilde":"Ñ","Ntilde;":"Ñ","Nu;":"Ν","OElig;":"Œ","Oacute":"Ó","Oacute;":"Ó","Ocirc":"Ô","Ocirc;":"Ô","Ocy;":"О","Odblac;":"Ő","Ofr;":"𝔒","Ograve":"Ò","Ograve;":"Ò","Omacr;":"Ō","Omega;":"Ω","Omicron;":"Ο","Oopf;":"𝕆","OpenCurlyDoubleQuote;":"“","OpenCurlyQuote;":"‘","Or;":"⩔","Oscr;":"𝒪","Oslash":"Ø","Oslash;":"Ø","Otilde":"Õ","Otilde;":"Õ","Otimes;":"⨷","Ouml":"Ö","Ouml;":"Ö","OverBar;":"‾","OverBrace;":"⏞","OverBracket;":"⎴","OverParenthesis;":"⏜","PartialD;":"∂","Pcy;":"П","Pfr;":"𝔓","Phi;":"Φ","Pi;":"Π","PlusMinus;":"±","Poincareplane;":"ℌ","Popf;":"ℙ","Pr;":"⪻","Precedes;":"≺","PrecedesEqual;":"⪯","PrecedesSlantEqual;":"≼","PrecedesTilde;":"≾","Prime;":"″","Product;":"∏","Proportion;":"∷","Proportional;":"∝","Pscr;":"𝒫","Psi;":"Ψ","QUOT":"\"","QUOT;":"\"","Qfr;":"𝔔","Qopf;":"ℚ","Qscr;":"𝒬","RBarr;":"⤐","REG":"®","REG;":"®","Racute;":"Ŕ","Rang;":"⟫","Rarr;":"↠","Rarrtl;":"⤖","Rcaron;":"Ř","Rcedil;":"Ŗ","Rcy;":"Р","Re;":"ℜ","ReverseElement;":"∋","ReverseEquilibrium;":"⇋","ReverseUpEquilibrium;":"⥯","Rfr;":"ℜ","Rho;":"Ρ","RightAngleBracket;":"⟩","RightArrow;":"→","RightArrowBar;":"⇥","RightArrowLeftArrow;":"⇄","RightCeiling;":"⌉","RightDoubleBracket;":"⟧","RightDownTeeVector;":"⥝","RightDownVector;":"⇂","RightDownVectorBar;":"⥕","RightFloor;":"⌋","RightTee;":"⊢","RightTeeArrow;":"↦","RightTeeVector;":"⥛","RightTriangle;":"⊳","RightTriangleBar;":"⧐","RightTriangleEqual;":"⊵","RightUpDownVector;":"⥏","RightUpTeeVector;":"⥜","RightUpVector;":"↾","RightUpVectorBar;":"⥔","RightVector;":"⇀","RightVectorBar;":"⥓","Rightarrow;":"⇒","Ropf;":"ℝ","RoundImplies;":"⥰","Rrightarrow;":"⇛","Rscr;":"ℛ","Rsh;":"↱","RuleDelayed;":"⧴","SHCHcy;":"Щ","SHcy;":"Ш","SOFTcy;":"Ь","Sacute;":"Ś","Sc;":"⪼","Scaron;":"Š","Scedil;":"Ş","Scirc;":"Ŝ","Scy;":"С","Sfr;":"𝔖","ShortDownArrow;":"↓","ShortLeftArrow;":"←","ShortRightArrow;":"→","ShortUpArrow;":"↑","Sigma;":"Σ","SmallCircle;":"∘","Sopf;":"𝕊","Sqrt;":"√","Square;":"□","SquareIntersection;":"⊓","SquareSubset;":"⊏","SquareSubsetEqual;":"⊑","SquareSuperset;":"⊐","SquareSupersetEqual;":"⊒","SquareUnion;":"⊔","Sscr;":"𝒮","Star;":"⋆","Sub;":"⋐","Subset;":"⋐","SubsetEqual;":"⊆","Succeeds;":"≻","SucceedsEqual;":"⪰","SucceedsSlantEqual;":"≽","SucceedsTilde;":"≿","SuchThat;":"∋","Sum;":"∑","Sup;":"⋑","Superset;":"⊃","SupersetEqual;":"⊇","Supset;":"⋑","THORN":"Þ","THORN;":"Þ","TRADE;":"™","TSHcy;":"Ћ","TScy;":"Ц","Tab;":"\t","Tau;":"Τ","Tcaron;":"Ť","Tcedil;":"Ţ","Tcy;":"Т","Tfr;":"𝔗","Therefore;":"∴","Theta;":"Θ","ThickSpace;":" ","ThinSpace;":" ","Tilde;":"∼","TildeEqual;":"≃","TildeFullEqual;":"≅","TildeTilde;":"≈","Topf;":"𝕋","TripleDot;":"⃛","Tscr;":"𝒯","Tstrok;":"Ŧ","Uacute":"Ú","Uacute;":"Ú","Uarr;":"↟","Uarrocir;":"⥉","Ubrcy;":"Ў","Ubreve;":"Ŭ","Ucirc":"Û","Ucirc;":"Û","Ucy;":"У","Udblac;":"Ű","Ufr;":"𝔘","Ugrave":"Ù","Ugrave;":"Ù","Umacr;":"Ū","UnderBar;":"_","UnderBrace;":"⏟","UnderBracket;":"⎵","UnderParenthesis;":"⏝","Union;":"⋃","UnionPlus;":"⊎","Uogon;":"Ų","Uopf;":"𝕌","UpArrow;":"↑","UpArrowBar;":"⤒","UpArrowDownArrow;":"⇅","UpDownArrow;":"↕","UpEquilibrium;":"⥮","UpTee;":"⊥","UpTeeArrow;":"↥","Uparrow;":"⇑","Updownarrow;":"⇕","UpperLeftArrow;":"↖","UpperRightArrow;":"↗","Upsi;":"ϒ","Upsilon;":"Υ","Uring;":"Ů","Uscr;":"𝒰","Utilde;":"Ũ","Uuml":"Ü","Uuml;":"Ü","VDash;":"⊫","Vbar;":"⫫","Vcy;":"В","Vdash;":"⊩","Vdashl;":"⫦","Vee;":"⋁","Verbar;":"‖","Vert;":"‖","VerticalBar;":"∣","VerticalLine;":"|","VerticalSeparator;":"❘","VerticalTilde;":"≀","VeryThinSpace;":" ","Vfr;":"𝔙","Vopf;":"𝕍","Vscr;":"𝒱","Vvdash;":"⊪","Wcirc;":"Ŵ","Wedge;":"⋀","Wfr;":"𝔚","Wopf;":"𝕎","Wscr;":"𝒲","Xfr;":"𝔛","Xi;":"Ξ","Xopf;":"𝕏","Xscr;":"𝒳","YAcy;":"Я","YIcy;":"Ї","YUcy;":"Ю","Yacute":"Ý","Yacute;":"Ý","Ycirc;":"Ŷ","Ycy;":"Ы","Yfr;":"𝔜","Yopf;":"𝕐","Yscr;":"𝒴","Yuml;":"Ÿ","ZHcy;":"Ж","Zacute;":"Ź","Zcaron;":"Ž","Zcy;":"З","Zdot;":"Ż","ZeroWidthSpace;":"","Zeta;":"Ζ","Zfr;":"ℨ","Zopf;":"ℤ","Zscr;":"𝒵","aacute":"á","aacute;":"á","abreve;":"ă","ac;":"∾","acE;":"∾̳","acd;":"∿","acirc":"â","acirc;":"â","acute":"´","acute;":"´","acy;":"а","aelig":"æ","aelig;":"æ","af;":"","afr;":"𝔞","agrave":"à","agrave;":"à","alefsym;":"ℵ","aleph;":"ℵ","alpha;":"α","amacr;":"ā","amalg;":"⨿","amp":"&","amp;":"&","and;":"∧","andand;":"⩕","andd;":"⩜","andslope;":"⩘","andv;":"⩚","ang;":"∠","ange;":"⦤","angle;":"∠","angmsd;":"∡","angmsdaa;":"⦨","angmsdab;":"⦩","angmsdac;":"⦪","angmsdad;":"⦫","angmsdae;":"⦬","angmsdaf;":"⦭","angmsdag;":"⦮","angmsdah;":"⦯","angrt;":"∟","angrtvb;":"⊾","angrtvbd;":"⦝","angsph;":"∢","angst;":"Å","angzarr;":"⍼","aogon;":"ą","aopf;":"𝕒","ap;":"≈","apE;":"⩰","apacir;":"⩯","ape;":"≊","apid;":"≋","apos;":"'","approx;":"≈","approxeq;":"≊","aring":"å","aring;":"å","ascr;":"𝒶","ast;":"*","asymp;":"≈","asympeq;":"≍","atilde":"ã","atilde;":"ã","auml":"ä","auml;":"ä","awconint;":"∳","awint;":"⨑","bNot;":"⫭","backcong;":"≌","backepsilon;":"϶","backprime;":"‵","backsim;":"∽","backsimeq;":"⋍","barvee;":"⊽","barwed;":"⌅","barwedge;":"⌅","bbrk;":"⎵","bbrktbrk;":"⎶","bcong;":"≌","bcy;":"б","bdquo;":"„","becaus;":"∵","because;":"∵","bemptyv;":"⦰","bepsi;":"϶","bernou;":"ℬ","beta;":"β","beth;":"ℶ","between;":"≬","bfr;":"𝔟","bigcap;":"⋂","bigcirc;":"◯","bigcup;":"⋃","bigodot;":"⨀","bigoplus;":"⨁","bigotimes;":"⨂","bigsqcup;":"⨆","bigstar;":"★","bigtriangledown;":"▽","bigtriangleup;":"△","biguplus;":"⨄","bigvee;":"⋁","bigwedge;":"⋀","bkarow;":"⤍","blacklozenge;":"⧫","blacksquare;":"▪","blacktriangle;":"▴","blacktriangledown;":"▾","blacktriangleleft;":"◂","blacktriangleright;":"▸","blank;":"␣","blk12;":"▒","blk14;":"░","blk34;":"▓","block;":"█","bne;":"=⃥","bnequiv;":"≡⃥","bnot;":"⌐","bopf;":"𝕓","bot;":"⊥","bottom;":"⊥","bowtie;":"⋈","boxDL;":"╗","boxDR;":"╔","boxDl;":"╖","boxDr;":"╓","boxH;":"═","boxHD;":"╦","boxHU;":"╩","boxHd;":"╤","boxHu;":"╧","boxUL;":"╝","boxUR;":"╚","boxUl;":"╜","boxUr;":"╙","boxV;":"║","boxVH;":"╬","boxVL;":"╣","boxVR;":"╠","boxVh;":"╫","boxVl;":"╢","boxVr;":"╟","boxbox;":"⧉","boxdL;":"╕","boxdR;":"╒","boxdl;":"┐","boxdr;":"┌","boxh;":"─","boxhD;":"╥","boxhU;":"╨","boxhd;":"┬","boxhu;":"┴","boxminus;":"⊟","boxplus;":"⊞","boxtimes;":"⊠","boxuL;":"╛","boxuR;":"╘","boxul;":"┘","boxur;":"└","boxv;":"│","boxvH;":"╪","boxvL;":"╡","boxvR;":"╞","boxvh;":"┼","boxvl;":"┤","boxvr;":"├","bprime;":"‵","breve;":"˘","brvbar":"¦","brvbar;":"¦","bscr;":"𝒷","bsemi;":"⁏","bsim;":"∽","bsime;":"⋍","bsol;":"\\","bsolb;":"⧅","bsolhsub;":"⟈","bull;":"•","bullet;":"•","bump;":"≎","bumpE;":"⪮","bumpe;":"≏","bumpeq;":"≏","cacute;":"ć","cap;":"∩","capand;":"⩄","capbrcup;":"⩉","capcap;":"⩋","capcup;":"⩇","capdot;":"⩀","caps;":"∩︀","caret;":"⁁","caron;":"ˇ","ccaps;":"⩍","ccaron;":"č","ccedil":"ç","ccedil;":"ç","ccirc;":"ĉ","ccups;":"⩌","ccupssm;":"⩐","cdot;":"ċ","cedil":"¸","cedil;":"¸","cemptyv;":"⦲","cent":"¢","cent;":"¢","centerdot;":"·","cfr;":"𝔠","chcy;":"ч","check;":"✓","checkmark;":"✓","chi;":"χ","cir;":"○","cirE;":"⧃","circ;":"ˆ","circeq;":"≗","circlearrowleft;":"↺","circlearrowright;":"↻","circledR;":"®","circledS;":"Ⓢ","circledast;":"⊛","circledcirc;":"⊚","circleddash;":"⊝","cire;":"≗","cirfnint;":"⨐","cirmid;":"⫯","cirscir;":"⧂","clubs;":"♣","clubsuit;":"♣","colon;":":","colone;":"≔","coloneq;":"≔","comma;":",","commat;":"@","comp;":"∁","compfn;":"∘","complement;":"∁","complexes;":"ℂ","cong;":"≅","congdot;":"⩭","conint;":"∮","copf;":"𝕔","coprod;":"∐","copy":"©","copy;":"©","copysr;":"℗","crarr;":"↵","cross;":"✗","cscr;":"𝒸","csub;":"⫏","csube;":"⫑","csup;":"⫐","csupe;":"⫒","ctdot;":"⋯","cudarrl;":"⤸","cudarrr;":"⤵","cuepr;":"⋞","cuesc;":"⋟","cularr;":"↶","cularrp;":"⤽","cup;":"∪","cupbrcap;":"⩈","cupcap;":"⩆","cupcup;":"⩊","cupdot;":"⊍","cupor;":"⩅","cups;":"∪︀","curarr;":"↷","curarrm;":"⤼","curlyeqprec;":"⋞","curlyeqsucc;":"⋟","curlyvee;":"⋎","curlywedge;":"⋏","curren":"¤","curren;":"¤","curvearrowleft;":"↶","curvearrowright;":"↷","cuvee;":"⋎","cuwed;":"⋏","cwconint;":"∲","cwint;":"∱","cylcty;":"⌭","dArr;":"⇓","dHar;":"⥥","dagger;":"†","daleth;":"ℸ","darr;":"↓","dash;":"‐","dashv;":"⊣","dbkarow;":"⤏","dblac;":"˝","dcaron;":"ď","dcy;":"д","dd;":"ⅆ","ddagger;":"‡","ddarr;":"⇊","ddotseq;":"⩷","deg":"°","deg;":"°","delta;":"δ","demptyv;":"⦱","dfisht;":"⥿","dfr;":"𝔡","dharl;":"⇃","dharr;":"⇂","diam;":"⋄","diamond;":"⋄","diamondsuit;":"♦","diams;":"♦","die;":"¨","digamma;":"ϝ","disin;":"⋲","div;":"÷","divide":"÷","divide;":"÷","divideontimes;":"⋇","divonx;":"⋇","djcy;":"ђ","dlcorn;":"⌞","dlcrop;":"⌍","dollar;":"$","dopf;":"𝕕","dot;":"˙","doteq;":"≐","doteqdot;":"≑","dotminus;":"∸","dotplus;":"∔","dotsquare;":"⊡","doublebarwedge;":"⌆","downarrow;":"↓","downdownarrows;":"⇊","downharpoonleft;":"⇃","downharpoonright;":"⇂","drbkarow;":"⤐","drcorn;":"⌟","drcrop;":"⌌","dscr;":"𝒹","dscy;":"ѕ","dsol;":"⧶","dstrok;":"đ","dtdot;":"⋱","dtri;":"▿","dtrif;":"▾","duarr;":"⇵","duhar;":"⥯","dwangle;":"⦦","dzcy;":"џ","dzigrarr;":"⟿","eDDot;":"⩷","eDot;":"≑","eacute":"é","eacute;":"é","easter;":"⩮","ecaron;":"ě","ecir;":"≖","ecirc":"ê","ecirc;":"ê","ecolon;":"≕","ecy;":"э","edot;":"ė","ee;":"ⅇ","efDot;":"≒","efr;":"𝔢","eg;":"⪚","egrave":"è","egrave;":"è","egs;":"⪖","egsdot;":"⪘","el;":"⪙","elinters;":"⏧","ell;":"ℓ","els;":"⪕","elsdot;":"⪗","emacr;":"ē","empty;":"∅","emptyset;":"∅","emptyv;":"∅","emsp13;":" ","emsp14;":" ","emsp;":" ","eng;":"ŋ","ensp;":" ","eogon;":"ę","eopf;":"𝕖","epar;":"⋕","eparsl;":"⧣","eplus;":"⩱","epsi;":"ε","epsilon;":"ε","epsiv;":"ϵ","eqcirc;":"≖","eqcolon;":"≕","eqsim;":"≂","eqslantgtr;":"⪖","eqslantless;":"⪕","equals;":"=","equest;":"≟","equiv;":"≡","equivDD;":"⩸","eqvparsl;":"⧥","erDot;":"≓","erarr;":"⥱","escr;":"ℯ","esdot;":"≐","esim;":"≂","eta;":"η","eth":"ð","eth;":"ð","euml":"ë","euml;":"ë","euro;":"€","excl;":"!","exist;":"∃","expectation;":"ℰ","exponentiale;":"ⅇ","fallingdotseq;":"≒","fcy;":"ф","female;":"♀","ffilig;":"ffi","fflig;":"ff","ffllig;":"ffl","ffr;":"𝔣","filig;":"fi","fjlig;":"fj","flat;":"♭","fllig;":"fl","fltns;":"▱","fnof;":"ƒ","fopf;":"𝕗","forall;":"∀","fork;":"⋔","forkv;":"⫙","fpartint;":"⨍","frac12":"½","frac12;":"½","frac13;":"⅓","frac14":"¼","frac14;":"¼","frac15;":"⅕","frac16;":"⅙","frac18;":"⅛","frac23;":"⅔","frac25;":"⅖","frac34":"¾","frac34;":"¾","frac35;":"⅗","frac38;":"⅜","frac45;":"⅘","frac56;":"⅚","frac58;":"⅝","frac78;":"⅞","frasl;":"⁄","frown;":"⌢","fscr;":"𝒻","gE;":"≧","gEl;":"⪌","gacute;":"ǵ","gamma;":"γ","gammad;":"ϝ","gap;":"⪆","gbreve;":"ğ","gcirc;":"ĝ","gcy;":"г","gdot;":"ġ","ge;":"≥","gel;":"⋛","geq;":"≥","geqq;":"≧","geqslant;":"⩾","ges;":"⩾","gescc;":"⪩","gesdot;":"⪀","gesdoto;":"⪂","gesdotol;":"⪄","gesl;":"⋛︀","gesles;":"⪔","gfr;":"𝔤","gg;":"≫","ggg;":"⋙","gimel;":"ℷ","gjcy;":"ѓ","gl;":"≷","glE;":"⪒","gla;":"⪥","glj;":"⪤","gnE;":"≩","gnap;":"⪊","gnapprox;":"⪊","gne;":"⪈","gneq;":"⪈","gneqq;":"≩","gnsim;":"⋧","gopf;":"𝕘","grave;":"`","gscr;":"ℊ","gsim;":"≳","gsime;":"⪎","gsiml;":"⪐","gt":">","gt;":">","gtcc;":"⪧","gtcir;":"⩺","gtdot;":"⋗","gtlPar;":"⦕","gtquest;":"⩼","gtrapprox;":"⪆","gtrarr;":"⥸","gtrdot;":"⋗","gtreqless;":"⋛","gtreqqless;":"⪌","gtrless;":"≷","gtrsim;":"≳","gvertneqq;":"≩︀","gvnE;":"≩︀","hArr;":"⇔","hairsp;":" ","half;":"½","hamilt;":"ℋ","hardcy;":"ъ","harr;":"↔","harrcir;":"⥈","harrw;":"↭","hbar;":"ℏ","hcirc;":"ĥ","hearts;":"♥","heartsuit;":"♥","hellip;":"…","hercon;":"⊹","hfr;":"𝔥","hksearow;":"⤥","hkswarow;":"⤦","hoarr;":"⇿","homtht;":"∻","hookleftarrow;":"↩","hookrightarrow;":"↪","hopf;":"𝕙","horbar;":"―","hscr;":"𝒽","hslash;":"ℏ","hstrok;":"ħ","hybull;":"⁃","hyphen;":"‐","iacute":"í","iacute;":"í","ic;":"","icirc":"î","icirc;":"î","icy;":"и","iecy;":"е","iexcl":"¡","iexcl;":"¡","iff;":"⇔","ifr;":"𝔦","igrave":"ì","igrave;":"ì","ii;":"ⅈ","iiiint;":"⨌","iiint;":"∭","iinfin;":"⧜","iiota;":"℩","ijlig;":"ij","imacr;":"ī","image;":"ℑ","imagline;":"ℐ","imagpart;":"ℑ","imath;":"ı","imof;":"⊷","imped;":"Ƶ","in;":"∈","incare;":"℅","infin;":"∞","infintie;":"⧝","inodot;":"ı","int;":"∫","intcal;":"⊺","integers;":"ℤ","intercal;":"⊺","intlarhk;":"⨗","intprod;":"⨼","iocy;":"ё","iogon;":"į","iopf;":"𝕚","iota;":"ι","iprod;":"⨼","iquest":"¿","iquest;":"¿","iscr;":"𝒾","isin;":"∈","isinE;":"⋹","isindot;":"⋵","isins;":"⋴","isinsv;":"⋳","isinv;":"∈","it;":"","itilde;":"ĩ","iukcy;":"і","iuml":"ï","iuml;":"ï","jcirc;":"ĵ","jcy;":"й","jfr;":"𝔧","jmath;":"ȷ","jopf;":"𝕛","jscr;":"𝒿","jsercy;":"ј","jukcy;":"є","kappa;":"κ","kappav;":"ϰ","kcedil;":"ķ","kcy;":"к","kfr;":"𝔨","kgreen;":"ĸ","khcy;":"х","kjcy;":"ќ","kopf;":"𝕜","kscr;":"𝓀","lAarr;":"⇚","lArr;":"⇐","lAtail;":"⤛","lBarr;":"⤎","lE;":"≦","lEg;":"⪋","lHar;":"⥢","lacute;":"ĺ","laemptyv;":"⦴","lagran;":"ℒ","lambda;":"λ","lang;":"⟨","langd;":"⦑","langle;":"⟨","lap;":"⪅","laquo":"«","laquo;":"«","larr;":"←","larrb;":"⇤","larrbfs;":"⤟","larrfs;":"⤝","larrhk;":"↩","larrlp;":"↫","larrpl;":"⤹","larrsim;":"⥳","larrtl;":"↢","lat;":"⪫","latail;":"⤙","late;":"⪭","lates;":"⪭︀","lbarr;":"⤌","lbbrk;":"❲","lbrace;":"{","lbrack;":"[","lbrke;":"⦋","lbrksld;":"⦏","lbrkslu;":"⦍","lcaron;":"ľ","lcedil;":"ļ","lceil;":"⌈","lcub;":"{","lcy;":"л","ldca;":"⤶","ldquo;":"“","ldquor;":"„","ldrdhar;":"⥧","ldrushar;":"⥋","ldsh;":"↲","le;":"≤","leftarrow;":"←","leftarrowtail;":"↢","leftharpoondown;":"↽","leftharpoonup;":"↼","leftleftarrows;":"⇇","leftrightarrow;":"↔","leftrightarrows;":"⇆","leftrightharpoons;":"⇋","leftrightsquigarrow;":"↭","leftthreetimes;":"⋋","leg;":"⋚","leq;":"≤","leqq;":"≦","leqslant;":"⩽","les;":"⩽","lescc;":"⪨","lesdot;":"⩿","lesdoto;":"⪁","lesdotor;":"⪃","lesg;":"⋚︀","lesges;":"⪓","lessapprox;":"⪅","lessdot;":"⋖","lesseqgtr;":"⋚","lesseqqgtr;":"⪋","lessgtr;":"≶","lesssim;":"≲","lfisht;":"⥼","lfloor;":"⌊","lfr;":"𝔩","lg;":"≶","lgE;":"⪑","lhard;":"↽","lharu;":"↼","lharul;":"⥪","lhblk;":"▄","ljcy;":"љ","ll;":"≪","llarr;":"⇇","llcorner;":"⌞","llhard;":"⥫","lltri;":"◺","lmidot;":"ŀ","lmoust;":"⎰","lmoustache;":"⎰","lnE;":"≨","lnap;":"⪉","lnapprox;":"⪉","lne;":"⪇","lneq;":"⪇","lneqq;":"≨","lnsim;":"⋦","loang;":"⟬","loarr;":"⇽","lobrk;":"⟦","longleftarrow;":"⟵","longleftrightarrow;":"⟷","longmapsto;":"⟼","longrightarrow;":"⟶","looparrowleft;":"↫","looparrowright;":"↬","lopar;":"⦅","lopf;":"𝕝","loplus;":"⨭","lotimes;":"⨴","lowast;":"∗","lowbar;":"_","loz;":"◊","lozenge;":"◊","lozf;":"⧫","lpar;":"(","lparlt;":"⦓","lrarr;":"⇆","lrcorner;":"⌟","lrhar;":"⇋","lrhard;":"⥭","lrm;":"","lrtri;":"⊿","lsaquo;":"‹","lscr;":"𝓁","lsh;":"↰","lsim;":"≲","lsime;":"⪍","lsimg;":"⪏","lsqb;":"[","lsquo;":"‘","lsquor;":"‚","lstrok;":"ł","lt":"<","lt;":"<","ltcc;":"⪦","ltcir;":"⩹","ltdot;":"⋖","lthree;":"⋋","ltimes;":"⋉","ltlarr;":"⥶","ltquest;":"⩻","ltrPar;":"⦖","ltri;":"◃","ltrie;":"⊴","ltrif;":"◂","lurdshar;":"⥊","luruhar;":"⥦","lvertneqq;":"≨︀","lvnE;":"≨︀","mDDot;":"∺","macr":"¯","macr;":"¯","male;":"♂","malt;":"✠","maltese;":"✠","map;":"↦","mapsto;":"↦","mapstodown;":"↧","mapstoleft;":"↤","mapstoup;":"↥","marker;":"▮","mcomma;":"⨩","mcy;":"м","mdash;":"—","measuredangle;":"∡","mfr;":"𝔪","mho;":"℧","micro":"µ","micro;":"µ","mid;":"∣","midast;":"*","midcir;":"⫰","middot":"·","middot;":"·","minus;":"−","minusb;":"⊟","minusd;":"∸","minusdu;":"⨪","mlcp;":"⫛","mldr;":"…","mnplus;":"∓","models;":"⊧","mopf;":"𝕞","mp;":"∓","mscr;":"𝓂","mstpos;":"∾","mu;":"μ","multimap;":"⊸","mumap;":"⊸","nGg;":"⋙̸","nGt;":"≫⃒","nGtv;":"≫̸","nLeftarrow;":"⇍","nLeftrightarrow;":"⇎","nLl;":"⋘̸","nLt;":"≪⃒","nLtv;":"≪̸","nRightarrow;":"⇏","nVDash;":"⊯","nVdash;":"⊮","nabla;":"∇","nacute;":"ń","nang;":"∠⃒","nap;":"≉","napE;":"⩰̸","napid;":"≋̸","napos;":"ʼn","napprox;":"≉","natur;":"♮","natural;":"♮","naturals;":"ℕ","nbsp":" ","nbsp;":" ","nbump;":"≎̸","nbumpe;":"≏̸","ncap;":"⩃","ncaron;":"ň","ncedil;":"ņ","ncong;":"≇","ncongdot;":"⩭̸","ncup;":"⩂","ncy;":"н","ndash;":"–","ne;":"≠","neArr;":"⇗","nearhk;":"⤤","nearr;":"↗","nearrow;":"↗","nedot;":"≐̸","nequiv;":"≢","nesear;":"⤨","nesim;":"≂̸","nexist;":"∄","nexists;":"∄","nfr;":"𝔫","ngE;":"≧̸","nge;":"≱","ngeq;":"≱","ngeqq;":"≧̸","ngeqslant;":"⩾̸","nges;":"⩾̸","ngsim;":"≵","ngt;":"≯","ngtr;":"≯","nhArr;":"⇎","nharr;":"↮","nhpar;":"⫲","ni;":"∋","nis;":"⋼","nisd;":"⋺","niv;":"∋","njcy;":"њ","nlArr;":"⇍","nlE;":"≦̸","nlarr;":"↚","nldr;":"‥","nle;":"≰","nleftarrow;":"↚","nleftrightarrow;":"↮","nleq;":"≰","nleqq;":"≦̸","nleqslant;":"⩽̸","nles;":"⩽̸","nless;":"≮","nlsim;":"≴","nlt;":"≮","nltri;":"⋪","nltrie;":"⋬","nmid;":"∤","nopf;":"𝕟","not":"¬","not;":"¬","notin;":"∉","notinE;":"⋹̸","notindot;":"⋵̸","notinva;":"∉","notinvb;":"⋷","notinvc;":"⋶","notni;":"∌","notniva;":"∌","notnivb;":"⋾","notnivc;":"⋽","npar;":"∦","nparallel;":"∦","nparsl;":"⫽⃥","npart;":"∂̸","npolint;":"⨔","npr;":"⊀","nprcue;":"⋠","npre;":"⪯̸","nprec;":"⊀","npreceq;":"⪯̸","nrArr;":"⇏","nrarr;":"↛","nrarrc;":"⤳̸","nrarrw;":"↝̸","nrightarrow;":"↛","nrtri;":"⋫","nrtrie;":"⋭","nsc;":"⊁","nsccue;":"⋡","nsce;":"⪰̸","nscr;":"𝓃","nshortmid;":"∤","nshortparallel;":"∦","nsim;":"≁","nsime;":"≄","nsimeq;":"≄","nsmid;":"∤","nspar;":"∦","nsqsube;":"⋢","nsqsupe;":"⋣","nsub;":"⊄","nsubE;":"⫅̸","nsube;":"⊈","nsubset;":"⊂⃒","nsubseteq;":"⊈","nsubseteqq;":"⫅̸","nsucc;":"⊁","nsucceq;":"⪰̸","nsup;":"⊅","nsupE;":"⫆̸","nsupe;":"⊉","nsupset;":"⊃⃒","nsupseteq;":"⊉","nsupseteqq;":"⫆̸","ntgl;":"≹","ntilde":"ñ","ntilde;":"ñ","ntlg;":"≸","ntriangleleft;":"⋪","ntrianglelefteq;":"⋬","ntriangleright;":"⋫","ntrianglerighteq;":"⋭","nu;":"ν","num;":"#","numero;":"№","numsp;":" ","nvDash;":"⊭","nvHarr;":"⤄","nvap;":"≍⃒","nvdash;":"⊬","nvge;":"≥⃒","nvgt;":">⃒","nvinfin;":"⧞","nvlArr;":"⤂","nvle;":"≤⃒","nvlt;":"<⃒","nvltrie;":"⊴⃒","nvrArr;":"⤃","nvrtrie;":"⊵⃒","nvsim;":"∼⃒","nwArr;":"⇖","nwarhk;":"⤣","nwarr;":"↖","nwarrow;":"↖","nwnear;":"⤧","oS;":"Ⓢ","oacute":"ó","oacute;":"ó","oast;":"⊛","ocir;":"⊚","ocirc":"ô","ocirc;":"ô","ocy;":"о","odash;":"⊝","odblac;":"ő","odiv;":"⨸","odot;":"⊙","odsold;":"⦼","oelig;":"œ","ofcir;":"⦿","ofr;":"𝔬","ogon;":"˛","ograve":"ò","ograve;":"ò","ogt;":"⧁","ohbar;":"⦵","ohm;":"Ω","oint;":"∮","olarr;":"↺","olcir;":"⦾","olcross;":"⦻","oline;":"‾","olt;":"⧀","omacr;":"ō","omega;":"ω","omicron;":"ο","omid;":"⦶","ominus;":"⊖","oopf;":"𝕠","opar;":"⦷","operp;":"⦹","oplus;":"⊕","or;":"∨","orarr;":"↻","ord;":"⩝","order;":"ℴ","orderof;":"ℴ","ordf":"ª","ordf;":"ª","ordm":"º","ordm;":"º","origof;":"⊶","oror;":"⩖","orslope;":"⩗","orv;":"⩛","oscr;":"ℴ","oslash":"ø","oslash;":"ø","osol;":"⊘","otilde":"õ","otilde;":"õ","otimes;":"⊗","otimesas;":"⨶","ouml":"ö","ouml;":"ö","ovbar;":"⌽","par;":"∥","para":"¶","para;":"¶","parallel;":"∥","parsim;":"⫳","parsl;":"⫽","part;":"∂","pcy;":"п","percnt;":"%","period;":".","permil;":"‰","perp;":"⊥","pertenk;":"‱","pfr;":"𝔭","phi;":"φ","phiv;":"ϕ","phmmat;":"ℳ","phone;":"☎","pi;":"π","pitchfork;":"⋔","piv;":"ϖ","planck;":"ℏ","planckh;":"ℎ","plankv;":"ℏ","plus;":"+","plusacir;":"⨣","plusb;":"⊞","pluscir;":"⨢","plusdo;":"∔","plusdu;":"⨥","pluse;":"⩲","plusmn":"±","plusmn;":"±","plussim;":"⨦","plustwo;":"⨧","pm;":"±","pointint;":"⨕","popf;":"𝕡","pound":"£","pound;":"£","pr;":"≺","prE;":"⪳","prap;":"⪷","prcue;":"≼","pre;":"⪯","prec;":"≺","precapprox;":"⪷","preccurlyeq;":"≼","preceq;":"⪯","precnapprox;":"⪹","precneqq;":"⪵","precnsim;":"⋨","precsim;":"≾","prime;":"′","primes;":"ℙ","prnE;":"⪵","prnap;":"⪹","prnsim;":"⋨","prod;":"∏","profalar;":"⌮","profline;":"⌒","profsurf;":"⌓","prop;":"∝","propto;":"∝","prsim;":"≾","prurel;":"⊰","pscr;":"𝓅","psi;":"ψ","puncsp;":" ","qfr;":"𝔮","qint;":"⨌","qopf;":"𝕢","qprime;":"⁗","qscr;":"𝓆","quaternions;":"ℍ","quatint;":"⨖","quest;":"?","questeq;":"≟","quot":"\"","quot;":"\"","rAarr;":"⇛","rArr;":"⇒","rAtail;":"⤜","rBarr;":"⤏","rHar;":"⥤","race;":"∽̱","racute;":"ŕ","radic;":"√","raemptyv;":"⦳","rang;":"⟩","rangd;":"⦒","range;":"⦥","rangle;":"⟩","raquo":"»","raquo;":"»","rarr;":"→","rarrap;":"⥵","rarrb;":"⇥","rarrbfs;":"⤠","rarrc;":"⤳","rarrfs;":"⤞","rarrhk;":"↪","rarrlp;":"↬","rarrpl;":"⥅","rarrsim;":"⥴","rarrtl;":"↣","rarrw;":"↝","ratail;":"⤚","ratio;":"∶","rationals;":"ℚ","rbarr;":"⤍","rbbrk;":"❳","rbrace;":"}","rbrack;":"]","rbrke;":"⦌","rbrksld;":"⦎","rbrkslu;":"⦐","rcaron;":"ř","rcedil;":"ŗ","rceil;":"⌉","rcub;":"}","rcy;":"р","rdca;":"⤷","rdldhar;":"⥩","rdquo;":"”","rdquor;":"”","rdsh;":"↳","real;":"ℜ","realine;":"ℛ","realpart;":"ℜ","reals;":"ℝ","rect;":"▭","reg":"®","reg;":"®","rfisht;":"⥽","rfloor;":"⌋","rfr;":"𝔯","rhard;":"⇁","rharu;":"⇀","rharul;":"⥬","rho;":"ρ","rhov;":"ϱ","rightarrow;":"→","rightarrowtail;":"↣","rightharpoondown;":"⇁","rightharpoonup;":"⇀","rightleftarrows;":"⇄","rightleftharpoons;":"⇌","rightrightarrows;":"⇉","rightsquigarrow;":"↝","rightthreetimes;":"⋌","ring;":"˚","risingdotseq;":"≓","rlarr;":"⇄","rlhar;":"⇌","rlm;":"","rmoust;":"⎱","rmoustache;":"⎱","rnmid;":"⫮","roang;":"⟭","roarr;":"⇾","robrk;":"⟧","ropar;":"⦆","ropf;":"𝕣","roplus;":"⨮","rotimes;":"⨵","rpar;":")","rpargt;":"⦔","rppolint;":"⨒","rrarr;":"⇉","rsaquo;":"›","rscr;":"𝓇","rsh;":"↱","rsqb;":"]","rsquo;":"’","rsquor;":"’","rthree;":"⋌","rtimes;":"⋊","rtri;":"▹","rtrie;":"⊵","rtrif;":"▸","rtriltri;":"⧎","ruluhar;":"⥨","rx;":"℞","sacute;":"ś","sbquo;":"‚","sc;":"≻","scE;":"⪴","scap;":"⪸","scaron;":"š","sccue;":"≽","sce;":"⪰","scedil;":"ş","scirc;":"ŝ","scnE;":"⪶","scnap;":"⪺","scnsim;":"⋩","scpolint;":"⨓","scsim;":"≿","scy;":"с","sdot;":"⋅","sdotb;":"⊡","sdote;":"⩦","seArr;":"⇘","searhk;":"⤥","searr;":"↘","searrow;":"↘","sect":"§","sect;":"§","semi;":";","seswar;":"⤩","setminus;":"∖","setmn;":"∖","sext;":"✶","sfr;":"𝔰","sfrown;":"⌢","sharp;":"♯","shchcy;":"щ","shcy;":"ш","shortmid;":"∣","shortparallel;":"∥","shy":"","shy;":"","sigma;":"σ","sigmaf;":"ς","sigmav;":"ς","sim;":"∼","simdot;":"⩪","sime;":"≃","simeq;":"≃","simg;":"⪞","simgE;":"⪠","siml;":"⪝","simlE;":"⪟","simne;":"≆","simplus;":"⨤","simrarr;":"⥲","slarr;":"←","smallsetminus;":"∖","smashp;":"⨳","smeparsl;":"⧤","smid;":"∣","smile;":"⌣","smt;":"⪪","smte;":"⪬","smtes;":"⪬︀","softcy;":"ь","sol;":"/","solb;":"⧄","solbar;":"⌿","sopf;":"𝕤","spades;":"♠","spadesuit;":"♠","spar;":"∥","sqcap;":"⊓","sqcaps;":"⊓︀","sqcup;":"⊔","sqcups;":"⊔︀","sqsub;":"⊏","sqsube;":"⊑","sqsubset;":"⊏","sqsubseteq;":"⊑","sqsup;":"⊐","sqsupe;":"⊒","sqsupset;":"⊐","sqsupseteq;":"⊒","squ;":"□","square;":"□","squarf;":"▪","squf;":"▪","srarr;":"→","sscr;":"𝓈","ssetmn;":"∖","ssmile;":"⌣","sstarf;":"⋆","star;":"☆","starf;":"★","straightepsilon;":"ϵ","straightphi;":"ϕ","strns;":"¯","sub;":"⊂","subE;":"⫅","subdot;":"⪽","sube;":"⊆","subedot;":"⫃","submult;":"⫁","subnE;":"⫋","subne;":"⊊","subplus;":"⪿","subrarr;":"⥹","subset;":"⊂","subseteq;":"⊆","subseteqq;":"⫅","subsetneq;":"⊊","subsetneqq;":"⫋","subsim;":"⫇","subsub;":"⫕","subsup;":"⫓","succ;":"≻","succapprox;":"⪸","succcurlyeq;":"≽","succeq;":"⪰","succnapprox;":"⪺","succneqq;":"⪶","succnsim;":"⋩","succsim;":"≿","sum;":"∑","sung;":"♪","sup1":"¹","sup1;":"¹","sup2":"²","sup2;":"²","sup3":"³","sup3;":"³","sup;":"⊃","supE;":"⫆","supdot;":"⪾","supdsub;":"⫘","supe;":"⊇","supedot;":"⫄","suphsol;":"⟉","suphsub;":"⫗","suplarr;":"⥻","supmult;":"⫂","supnE;":"⫌","supne;":"⊋","supplus;":"⫀","supset;":"⊃","supseteq;":"⊇","supseteqq;":"⫆","supsetneq;":"⊋","supsetneqq;":"⫌","supsim;":"⫈","supsub;":"⫔","supsup;":"⫖","swArr;":"⇙","swarhk;":"⤦","swarr;":"↙","swarrow;":"↙","swnwar;":"⤪","szlig":"ß","szlig;":"ß","target;":"⌖","tau;":"τ","tbrk;":"⎴","tcaron;":"ť","tcedil;":"ţ","tcy;":"т","tdot;":"⃛","telrec;":"⌕","tfr;":"𝔱","there4;":"∴","therefore;":"∴","theta;":"θ","thetasym;":"ϑ","thetav;":"ϑ","thickapprox;":"≈","thicksim;":"∼","thinsp;":" ","thkap;":"≈","thksim;":"∼","thorn":"þ","thorn;":"þ","tilde;":"˜","times":"×","times;":"×","timesb;":"⊠","timesbar;":"⨱","timesd;":"⨰","tint;":"∭","toea;":"⤨","top;":"⊤","topbot;":"⌶","topcir;":"⫱","topf;":"𝕥","topfork;":"⫚","tosa;":"⤩","tprime;":"‴","trade;":"™","triangle;":"▵","triangledown;":"▿","triangleleft;":"◃","trianglelefteq;":"⊴","triangleq;":"≜","triangleright;":"▹","trianglerighteq;":"⊵","tridot;":"◬","trie;":"≜","triminus;":"⨺","triplus;":"⨹","trisb;":"⧍","tritime;":"⨻","trpezium;":"⏢","tscr;":"𝓉","tscy;":"ц","tshcy;":"ћ","tstrok;":"ŧ","twixt;":"≬","twoheadleftarrow;":"↞","twoheadrightarrow;":"↠","uArr;":"⇑","uHar;":"⥣","uacute":"ú","uacute;":"ú","uarr;":"↑","ubrcy;":"ў","ubreve;":"ŭ","ucirc":"û","ucirc;":"û","ucy;":"у","udarr;":"⇅","udblac;":"ű","udhar;":"⥮","ufisht;":"⥾","ufr;":"𝔲","ugrave":"ù","ugrave;":"ù","uharl;":"↿","uharr;":"↾","uhblk;":"▀","ulcorn;":"⌜","ulcorner;":"⌜","ulcrop;":"⌏","ultri;":"◸","umacr;":"ū","uml":"¨","uml;":"¨","uogon;":"ų","uopf;":"𝕦","uparrow;":"↑","updownarrow;":"↕","upharpoonleft;":"↿","upharpoonright;":"↾","uplus;":"⊎","upsi;":"υ","upsih;":"ϒ","upsilon;":"υ","upuparrows;":"⇈","urcorn;":"⌝","urcorner;":"⌝","urcrop;":"⌎","uring;":"ů","urtri;":"◹","uscr;":"𝓊","utdot;":"⋰","utilde;":"ũ","utri;":"▵","utrif;":"▴","uuarr;":"⇈","uuml":"ü","uuml;":"ü","uwangle;":"⦧","vArr;":"⇕","vBar;":"⫨","vBarv;":"⫩","vDash;":"⊨","vangrt;":"⦜","varepsilon;":"ϵ","varkappa;":"ϰ","varnothing;":"∅","varphi;":"ϕ","varpi;":"ϖ","varpropto;":"∝","varr;":"↕","varrho;":"ϱ","varsigma;":"ς","varsubsetneq;":"⊊︀","varsubsetneqq;":"⫋︀","varsupsetneq;":"⊋︀","varsupsetneqq;":"⫌︀","vartheta;":"ϑ","vartriangleleft;":"⊲","vartriangleright;":"⊳","vcy;":"в","vdash;":"⊢","vee;":"∨","veebar;":"⊻","veeeq;":"≚","vellip;":"⋮","verbar;":"|","vert;":"|","vfr;":"𝔳","vltri;":"⊲","vnsub;":"⊂⃒","vnsup;":"⊃⃒","vopf;":"𝕧","vprop;":"∝","vrtri;":"⊳","vscr;":"𝓋","vsubnE;":"⫋︀","vsubne;":"⊊︀","vsupnE;":"⫌︀","vsupne;":"⊋︀","vzigzag;":"⦚","wcirc;":"ŵ","wedbar;":"⩟","wedge;":"∧","wedgeq;":"≙","weierp;":"℘","wfr;":"𝔴","wopf;":"𝕨","wp;":"℘","wr;":"≀","wreath;":"≀","wscr;":"𝓌","xcap;":"⋂","xcirc;":"◯","xcup;":"⋃","xdtri;":"▽","xfr;":"𝔵","xhArr;":"⟺","xharr;":"⟷","xi;":"ξ","xlArr;":"⟸","xlarr;":"⟵","xmap;":"⟼","xnis;":"⋻","xodot;":"⨀","xopf;":"𝕩","xoplus;":"⨁","xotime;":"⨂","xrArr;":"⟹","xrarr;":"⟶","xscr;":"𝓍","xsqcup;":"⨆","xuplus;":"⨄","xutri;":"△","xvee;":"⋁","xwedge;":"⋀","yacute":"ý","yacute;":"ý","yacy;":"я","ycirc;":"ŷ","ycy;":"ы","yen":"¥","yen;":"¥","yfr;":"𝔶","yicy;":"ї","yopf;":"𝕪","yscr;":"𝓎","yucy;":"ю","yuml":"ÿ","yuml;":"ÿ","zacute;":"ź","zcaron;":"ž","zcy;":"з","zdot;":"ż","zeetrf;":"ℨ","zeta;":"ζ","zfr;":"𝔷","zhcy;":"ж","zigrarr;":"⇝","zopf;":"𝕫","zscr;":"𝓏","zwj;":"","zwnj;":""})));
|
|
26
|
+
// #endregion
|
|
27
|
+
|
|
28
|
+
const STATE_DATA = 0;
|
|
29
|
+
const STATE_TAG_OPEN = 1;
|
|
30
|
+
const STATE_END_TAG_OPEN = 2;
|
|
31
|
+
const STATE_TAG_NAME = 3;
|
|
32
|
+
const STATE_BEFORE_ATTRIBUTE_NAME = 4;
|
|
33
|
+
const STATE_ATTRIBUTE_NAME = 5;
|
|
34
|
+
const STATE_AFTER_ATTRIBUTE_NAME = 6;
|
|
35
|
+
const STATE_BEFORE_ATTRIBUTE_VALUE = 7;
|
|
36
|
+
const STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
|
|
37
|
+
const STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
|
|
38
|
+
const STATE_ATTRIBUTE_VALUE_UNQUOTED = 10;
|
|
39
|
+
const STATE_AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
|
|
40
|
+
const STATE_SELF_CLOSING_START_TAG = 12;
|
|
41
|
+
|
|
42
|
+
const STATE_MARKUP_DECLARATION_OPEN = 13;
|
|
43
|
+
const STATE_COMMENT_START = 14;
|
|
44
|
+
const STATE_COMMENT_START_DASH = 15;
|
|
45
|
+
const STATE_COMMENT = 16;
|
|
46
|
+
const STATE_COMMENT_END_DASH = 17;
|
|
47
|
+
const STATE_COMMENT_END = 18;
|
|
48
|
+
const STATE_COMMENT_END_BANG = 19;
|
|
49
|
+
const STATE_BOGUS_COMMENT = 20;
|
|
50
|
+
|
|
51
|
+
const STATE_COMMENT_LESS_THAN_SIGN = 21;
|
|
52
|
+
const STATE_COMMENT_LESS_THAN_SIGN_BANG = 22;
|
|
53
|
+
const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH = 23;
|
|
54
|
+
const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 24;
|
|
55
|
+
|
|
56
|
+
const STATE_DOCTYPE = 25;
|
|
57
|
+
const STATE_BEFORE_DOCTYPE_NAME = 26;
|
|
58
|
+
const STATE_DOCTYPE_NAME = 27;
|
|
59
|
+
const STATE_AFTER_DOCTYPE_NAME = 28;
|
|
60
|
+
const STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD = 29;
|
|
61
|
+
const STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 30;
|
|
62
|
+
const STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 31;
|
|
63
|
+
const STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 32;
|
|
64
|
+
const STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 33;
|
|
65
|
+
const STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 34;
|
|
66
|
+
const STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD = 35;
|
|
67
|
+
const STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 36;
|
|
68
|
+
const STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 37;
|
|
69
|
+
const STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 38;
|
|
70
|
+
const STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 39;
|
|
71
|
+
const STATE_BOGUS_DOCTYPE = 40;
|
|
72
|
+
|
|
73
|
+
const STATE_CDATA_SECTION = 41;
|
|
74
|
+
const STATE_CDATA_SECTION_BRACKET = 42;
|
|
75
|
+
const STATE_CDATA_SECTION_END = 43;
|
|
76
|
+
|
|
77
|
+
const STATE_RCDATA = 44;
|
|
78
|
+
const STATE_RCDATA_LESS_THAN_SIGN = 45;
|
|
79
|
+
const STATE_RCDATA_END_TAG_OPEN = 46;
|
|
80
|
+
const STATE_RCDATA_END_TAG_NAME = 47;
|
|
81
|
+
|
|
82
|
+
const STATE_RAWTEXT = 48;
|
|
83
|
+
const STATE_RAWTEXT_LESS_THAN_SIGN = 49;
|
|
84
|
+
const STATE_RAWTEXT_END_TAG_OPEN = 50;
|
|
85
|
+
const STATE_RAWTEXT_END_TAG_NAME = 51;
|
|
86
|
+
|
|
87
|
+
const STATE_SCRIPT_DATA = 52;
|
|
88
|
+
const STATE_SCRIPT_DATA_LESS_THAN_SIGN = 53;
|
|
89
|
+
const STATE_SCRIPT_DATA_END_TAG_OPEN = 54;
|
|
90
|
+
const STATE_SCRIPT_DATA_END_TAG_NAME = 55;
|
|
91
|
+
const STATE_SCRIPT_DATA_ESCAPE_START = 56;
|
|
92
|
+
const STATE_SCRIPT_DATA_ESCAPE_START_DASH = 57;
|
|
93
|
+
const STATE_SCRIPT_DATA_ESCAPED = 58;
|
|
94
|
+
const STATE_SCRIPT_DATA_ESCAPED_DASH = 59;
|
|
95
|
+
const STATE_SCRIPT_DATA_ESCAPED_DASH_DASH = 60;
|
|
96
|
+
const STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 61;
|
|
97
|
+
const STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 62;
|
|
98
|
+
const STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME = 63;
|
|
99
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START = 64;
|
|
100
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED = 65;
|
|
101
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 66;
|
|
102
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 67;
|
|
103
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 68;
|
|
104
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END = 69;
|
|
105
|
+
|
|
106
|
+
const STATE_PLAINTEXT = 70;
|
|
107
|
+
|
|
108
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
|
|
109
|
+
const STATE_CHARACTER_REFERENCE = 71;
|
|
110
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
|
111
|
+
const STATE_NAMED_CHARACTER_REFERENCE = 72;
|
|
112
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
|
|
113
|
+
const STATE_AMBIGUOUS_AMPERSAND = 73;
|
|
114
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
|
|
115
|
+
const STATE_NUMERIC_CHARACTER_REFERENCE = 74;
|
|
116
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
|
|
117
|
+
const STATE_HEXADECIMAL_CHARACTER_REFERENCE_START = 75;
|
|
118
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
|
|
119
|
+
const STATE_DECIMAL_CHARACTER_REFERENCE_START = 76;
|
|
120
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
|
|
121
|
+
const STATE_HEXADECIMAL_CHARACTER_REFERENCE = 77;
|
|
122
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
|
|
123
|
+
const STATE_DECIMAL_CHARACTER_REFERENCE = 78;
|
|
124
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
|
125
|
+
const STATE_NUMERIC_CHARACTER_REFERENCE_END = 79;
|
|
126
|
+
|
|
127
|
+
const CC_TAB = 0x09;
|
|
128
|
+
const CC_LF = 0x0a;
|
|
129
|
+
const CC_FF = 0x0c;
|
|
130
|
+
const CC_SPACE = 0x20;
|
|
131
|
+
const CC_EXCLAMATION_MARK = 0x21;
|
|
132
|
+
const CC_QUOTATION_MARK = 0x22;
|
|
133
|
+
const CC_NUMBER_SIGN = 0x23;
|
|
134
|
+
const CC_AMPERSAND = 0x26;
|
|
135
|
+
const CC_APOSTROPHE = 0x27;
|
|
136
|
+
const CC_HYPHEN_MINUS = 0x2d;
|
|
137
|
+
const CC_SOLIDUS = 0x2f;
|
|
138
|
+
const CC_SEMICOLON = 0x3b;
|
|
139
|
+
const CC_LESS_THAN = 0x3c;
|
|
140
|
+
const CC_EQUALS = 0x3d;
|
|
141
|
+
const CC_GREATER_THAN = 0x3e;
|
|
142
|
+
const CC_QUESTION_MARK = 0x3f;
|
|
143
|
+
const CC_LEFT_SQUARE_BRACKET = 0x5b;
|
|
144
|
+
const CC_RIGHT_SQUARE_BRACKET = 0x5d;
|
|
145
|
+
|
|
146
|
+
const QUOTE_DOUBLE = 1;
|
|
147
|
+
const QUOTE_SINGLE = 2;
|
|
148
|
+
const QUOTE_NONE = 0;
|
|
149
|
+
|
|
150
|
+
// Longest WHATWG named entity name *including* the trailing `;` is 32 chars
|
|
151
|
+
// (`CounterClockwiseContourIntegral;`); without the trailing `;` it's 31.
|
|
152
|
+
// Used to cap both the tokenizer's named-character-reference run length and
|
|
153
|
+
// the decoder's longest-prefix backtrack so pathological inputs (e.g. `&`
|
|
154
|
+
// followed by thousands of alphanumerics) stay linear-time.
|
|
155
|
+
const MAX_ENTITY_NAME_LEN = 32;
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* @param {number} cc character code
|
|
159
|
+
* @returns {boolean} is ascii alpha
|
|
160
|
+
*/
|
|
161
|
+
const isAsciiAlpha = (cc) =>
|
|
162
|
+
(cc >= 0x41 && cc <= 0x5a) || (cc >= 0x61 && cc <= 0x7a);
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* @param {number} cc character code
|
|
166
|
+
* @returns {boolean} is ascii alphanumeric
|
|
167
|
+
*/
|
|
168
|
+
const isAsciiAlphanumeric = (cc) =>
|
|
169
|
+
isAsciiAlpha(cc) || (cc >= 0x30 && cc <= 0x39);
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* @param {number} cc character code
|
|
173
|
+
* @returns {boolean} is ascii digit
|
|
174
|
+
*/
|
|
175
|
+
const isAsciiDigit = (cc) => cc >= 0x30 && cc <= 0x39;
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* @param {number} cc character code
|
|
179
|
+
* @returns {boolean} is ascii hex digit
|
|
180
|
+
*/
|
|
181
|
+
const isAsciiHexDigit = (cc) =>
|
|
182
|
+
(cc >= 0x30 && cc <= 0x39) ||
|
|
183
|
+
(cc >= 0x41 && cc <= 0x46) ||
|
|
184
|
+
(cc >= 0x61 && cc <= 0x66);
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* @param {number} cc character code
|
|
188
|
+
* @returns {boolean} is space
|
|
189
|
+
*/
|
|
190
|
+
const isSpace = (cc) =>
|
|
191
|
+
cc === CC_TAB || cc === CC_LF || cc === CC_FF || cc === CC_SPACE;
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Severity of a tokenizer-detected parse error. `"warning"` is recoverable
|
|
195
|
+
* (the tokenizer continued and the emitted token is still well-formed, e.g.
|
|
196
|
+
* missing-attribute-value); `"error"` means the emitted token's offset
|
|
197
|
+
* range is incomplete or does not match what the spec would produce, e.g.
|
|
198
|
+
* eof-in-tag.
|
|
199
|
+
*
|
|
200
|
+
* Token offsets are JS string indices (UTF-16 code-unit offsets into
|
|
201
|
+
* `input`), not byte offsets — relevant for inputs containing non-BMP
|
|
202
|
+
* code points where one code point spans two indices.
|
|
203
|
+
* @typedef {"warning" | "error"} ParseErrorSeverity
|
|
204
|
+
*/
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* @typedef {object} HtmlTokenCallbacks
|
|
208
|
+
* @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number, selfClosing: boolean) => number=} openTag
|
|
209
|
+
* @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number) => number=} closeTag
|
|
210
|
+
* @property {(input: string, start: number, end: number) => number=} text
|
|
211
|
+
* @property {(input: string, nameStart: number, nameEnd: number, valueStart: number, valueEnd: number, quoteType: number) => number=} attribute
|
|
212
|
+
* @property {(input: string, start: number, end: number) => number=} comment
|
|
213
|
+
* @property {(input: string, start: number, end: number) => number=} doctype
|
|
214
|
+
* @property {(input: string, code: string, start: number, end: number, severity: ParseErrorSeverity) => void=} parseError
|
|
215
|
+
*/
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* @param {string} input input string
|
|
219
|
+
* @param {number} pos current position
|
|
220
|
+
* @param {HtmlTokenCallbacks} callbacks callbacks
|
|
221
|
+
* @returns {number} final position
|
|
222
|
+
*/
|
|
223
|
+
const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
|
|
224
|
+
const len = input.length;
|
|
225
|
+
let state = STATE_DATA;
|
|
226
|
+
let returnState = STATE_DATA;
|
|
227
|
+
|
|
228
|
+
let textStart = pos;
|
|
229
|
+
let tagStart = pos;
|
|
230
|
+
let tagNameStart = -1;
|
|
231
|
+
let tagNameEnd = -1;
|
|
232
|
+
let attrNameStart = -1;
|
|
233
|
+
let attrNameEnd = -1;
|
|
234
|
+
let attrValueStart = -1;
|
|
235
|
+
let attrQuoteType = QUOTE_NONE;
|
|
236
|
+
let commentStart = pos;
|
|
237
|
+
let lastOpenTagName = "";
|
|
238
|
+
// Counter used by SCRIPT_DATA_DOUBLE_ESCAPE_{START,END} to detect whether
|
|
239
|
+
// the ASCII-alpha run after `<` / `</` spells exactly `"script"`. Values
|
|
240
|
+
// 0..6 = number of chars matched so far; 7 = no longer matches (sentinel).
|
|
241
|
+
// Avoids growing a buffer for pathological inputs with long alpha runs.
|
|
242
|
+
let scriptMatch = 0;
|
|
243
|
+
let namedEntityConsumed = 0;
|
|
244
|
+
// Tracks whether the current tag has parsed any attributes — used to
|
|
245
|
+
// fire the `end-tag-with-attributes` parse error when an end tag emits.
|
|
246
|
+
let tagHasAttributes = false;
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Reports a tokenizer parse error to the consumer. The offset range and
|
|
250
|
+
* severity follow the WHATWG spec naming. Severity is `"error"` for
|
|
251
|
+
* cases where the emitted token is incomplete (EOF inside a tag or
|
|
252
|
+
* comment); everything else is a `"warning"`. Offsets are JS string
|
|
253
|
+
* indices (UTF-16 code-unit offsets into `input`).
|
|
254
|
+
* @param {string} code WHATWG parse-error code (kebab-case)
|
|
255
|
+
* @param {number} start string offset where the error starts
|
|
256
|
+
* @param {number} end string offset where the error ends
|
|
257
|
+
* @param {ParseErrorSeverity} severity error severity
|
|
258
|
+
*/
|
|
259
|
+
const reportError = (code, start, end, severity) => {
|
|
260
|
+
if (callbacks.parseError !== undefined) {
|
|
261
|
+
callbacks.parseError(input, code, start, end, severity);
|
|
262
|
+
}
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* @param {number} cc character code
|
|
267
|
+
* @returns {boolean} is ascii lower alpha
|
|
268
|
+
*/
|
|
269
|
+
const isAsciiLowerAlpha = (cc) => cc >= 0x61 && cc <= 0x7a;
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* @param {number} cc character code
|
|
273
|
+
* @returns {boolean} is ascii upper alpha
|
|
274
|
+
*/
|
|
275
|
+
const isAsciiUpperAlpha = (cc) => cc >= 0x41 && cc <= 0x5a;
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* @param {string} name tag name (lowercase)
|
|
279
|
+
* @returns {number} content mode state for this tag, or STATE_DATA
|
|
280
|
+
*/
|
|
281
|
+
const getContentModeForTag = (name) => {
|
|
282
|
+
switch (name) {
|
|
283
|
+
case "textarea":
|
|
284
|
+
case "title":
|
|
285
|
+
return STATE_RCDATA;
|
|
286
|
+
case "style":
|
|
287
|
+
case "xmp":
|
|
288
|
+
case "iframe":
|
|
289
|
+
case "noembed":
|
|
290
|
+
case "noframes":
|
|
291
|
+
return STATE_RAWTEXT;
|
|
292
|
+
case "script":
|
|
293
|
+
return STATE_SCRIPT_DATA;
|
|
294
|
+
case "plaintext":
|
|
295
|
+
return STATE_PLAINTEXT;
|
|
296
|
+
default:
|
|
297
|
+
return STATE_DATA;
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* @param {number} endPos end position
|
|
303
|
+
*/
|
|
304
|
+
const flushText = (endPos) => {
|
|
305
|
+
if (textStart < endPos) {
|
|
306
|
+
if (callbacks.text !== undefined) {
|
|
307
|
+
callbacks.text(input, textStart, endPos);
|
|
308
|
+
}
|
|
309
|
+
// Advance `textStart` so a second `flushText` for the same span
|
|
310
|
+
// (e.g. from the EOF handler after a tag-open transition already
|
|
311
|
+
// flushed the pending text) is a no-op rather than a duplicate
|
|
312
|
+
// emit. emitOpenTag / emitCloseTag overwrite `textStart` with
|
|
313
|
+
// their own `nextPos` anyway, so this doesn't shift their start.
|
|
314
|
+
textStart = endPos;
|
|
315
|
+
}
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* @param {number} endPos end position
|
|
320
|
+
* @returns {number} next position
|
|
321
|
+
*/
|
|
322
|
+
const emitAttribute = (endPos) => {
|
|
323
|
+
// Default `nextPos` advances past the closing quote (if any) so the
|
|
324
|
+
// state machine can continue when no `attribute` callback is provided.
|
|
325
|
+
// When a callback IS provided, its return value overrides the default —
|
|
326
|
+
// the callback is expected to do the same advance based on the
|
|
327
|
+
// reported `quoteType`.
|
|
328
|
+
let nextPos = attrQuoteType === QUOTE_NONE ? endPos : endPos + 1;
|
|
329
|
+
if (callbacks.attribute !== undefined && attrNameStart !== -1) {
|
|
330
|
+
nextPos = callbacks.attribute(
|
|
331
|
+
input,
|
|
332
|
+
attrNameStart,
|
|
333
|
+
attrNameEnd,
|
|
334
|
+
attrValueStart,
|
|
335
|
+
attrValueStart === -1 ? -1 : endPos,
|
|
336
|
+
attrQuoteType
|
|
337
|
+
);
|
|
338
|
+
}
|
|
339
|
+
if (attrNameStart !== -1) tagHasAttributes = true;
|
|
340
|
+
attrNameStart = -1;
|
|
341
|
+
attrValueStart = -1;
|
|
342
|
+
attrQuoteType = QUOTE_NONE;
|
|
343
|
+
return nextPos;
|
|
344
|
+
};
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* @param {number} endPos end position
|
|
348
|
+
* @param {boolean} selfClosing is self closing
|
|
349
|
+
* @returns {number} next position
|
|
350
|
+
*/
|
|
351
|
+
const emitOpenTag = (endPos, selfClosing) => {
|
|
352
|
+
let nextPos = endPos;
|
|
353
|
+
if (callbacks.openTag !== undefined) {
|
|
354
|
+
nextPos = callbacks.openTag(
|
|
355
|
+
input,
|
|
356
|
+
tagStart,
|
|
357
|
+
endPos,
|
|
358
|
+
tagNameStart,
|
|
359
|
+
tagNameEnd,
|
|
360
|
+
selfClosing
|
|
361
|
+
);
|
|
362
|
+
}
|
|
363
|
+
if (!selfClosing) {
|
|
364
|
+
lastOpenTagName = input.slice(tagNameStart, tagNameEnd).toLowerCase();
|
|
365
|
+
}
|
|
366
|
+
tagHasAttributes = false;
|
|
367
|
+
textStart = nextPos;
|
|
368
|
+
return nextPos;
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* @param {number} endPos end position
|
|
373
|
+
* @returns {number} next position
|
|
374
|
+
*/
|
|
375
|
+
const emitCloseTag = (endPos) => {
|
|
376
|
+
// Per WHATWG: an end tag emitted with attributes is a parse error.
|
|
377
|
+
if (tagHasAttributes) {
|
|
378
|
+
reportError("end-tag-with-attributes", tagStart, endPos, "warning");
|
|
379
|
+
}
|
|
380
|
+
let nextPos = endPos;
|
|
381
|
+
if (callbacks.closeTag !== undefined) {
|
|
382
|
+
nextPos = callbacks.closeTag(
|
|
383
|
+
input,
|
|
384
|
+
tagStart,
|
|
385
|
+
endPos,
|
|
386
|
+
tagNameStart,
|
|
387
|
+
tagNameEnd
|
|
388
|
+
);
|
|
389
|
+
}
|
|
390
|
+
tagHasAttributes = false;
|
|
391
|
+
textStart = nextPos;
|
|
392
|
+
return nextPos;
|
|
393
|
+
};
|
|
394
|
+
|
|
395
|
+
while (pos < len) {
|
|
396
|
+
const cc = input.charCodeAt(pos);
|
|
397
|
+
|
|
398
|
+
// TODO: We don't handle all states here yet. In the future we will need to handle
|
|
399
|
+
// all of them, and when we move all the tokenizer we will remove it.
|
|
400
|
+
switch (state) {
|
|
401
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#data-state
|
|
402
|
+
case STATE_DATA:
|
|
403
|
+
// Consume the next input character:
|
|
404
|
+
// U+003C LESS-THAN SIGN (<)
|
|
405
|
+
// Set the return state to the data state. Switch to the tag open state.
|
|
406
|
+
if (cc === CC_LESS_THAN) {
|
|
407
|
+
tagStart = pos;
|
|
408
|
+
state = STATE_TAG_OPEN;
|
|
409
|
+
pos++;
|
|
410
|
+
} else if (cc === CC_AMPERSAND) {
|
|
411
|
+
// U+0026 AMPERSAND (&)
|
|
412
|
+
// Set the return state to the data state. Switch to the
|
|
413
|
+
// character reference state.
|
|
414
|
+
returnState = STATE_DATA;
|
|
415
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
416
|
+
pos++;
|
|
417
|
+
} else {
|
|
418
|
+
pos++;
|
|
419
|
+
}
|
|
420
|
+
break;
|
|
421
|
+
|
|
422
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
|
|
423
|
+
case STATE_TAG_OPEN:
|
|
424
|
+
// Consume the next input character:
|
|
425
|
+
// U+002F SOLIDUS (/)
|
|
426
|
+
// Switch to the end tag open state.
|
|
427
|
+
if (cc === CC_SOLIDUS) {
|
|
428
|
+
state = STATE_END_TAG_OPEN;
|
|
429
|
+
pos++;
|
|
430
|
+
} else if (cc === CC_EXCLAMATION_MARK) {
|
|
431
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
432
|
+
// Switch to the markup declaration open state.
|
|
433
|
+
flushText(tagStart);
|
|
434
|
+
commentStart = tagStart;
|
|
435
|
+
state = STATE_MARKUP_DECLARATION_OPEN;
|
|
436
|
+
pos++;
|
|
437
|
+
} else if (isAsciiAlpha(cc)) {
|
|
438
|
+
// ASCII alpha
|
|
439
|
+
// Create a new start tag token, set its tag name to the empty string.
|
|
440
|
+
// Reconsume in the tag name state.
|
|
441
|
+
flushText(tagStart);
|
|
442
|
+
tagNameStart = pos;
|
|
443
|
+
state = STATE_TAG_NAME;
|
|
444
|
+
// Reconsume
|
|
445
|
+
} else if (cc === CC_QUESTION_MARK) {
|
|
446
|
+
// U+003F QUESTION MARK (?)
|
|
447
|
+
// This is an unexpected-question-mark-instead-of-tag-name parse error.
|
|
448
|
+
// Create a comment token whose data is the empty string. Reconsume in the
|
|
449
|
+
// bogus comment state.
|
|
450
|
+
reportError(
|
|
451
|
+
"unexpected-question-mark-instead-of-tag-name",
|
|
452
|
+
pos,
|
|
453
|
+
pos + 1,
|
|
454
|
+
"warning"
|
|
455
|
+
);
|
|
456
|
+
flushText(tagStart);
|
|
457
|
+
commentStart = tagStart;
|
|
458
|
+
state = STATE_BOGUS_COMMENT;
|
|
459
|
+
// Reconsume — let the bogus-comment state consume the `?`
|
|
460
|
+
// itself, matching the spec.
|
|
461
|
+
} else {
|
|
462
|
+
// Anything else
|
|
463
|
+
// This is an invalid-first-character-of-tag-name parse error. Emit a U+003C
|
|
464
|
+
// LESS-THAN SIGN character token. Reconsume in the data state.
|
|
465
|
+
reportError(
|
|
466
|
+
"invalid-first-character-of-tag-name",
|
|
467
|
+
pos,
|
|
468
|
+
pos + 1,
|
|
469
|
+
"warning"
|
|
470
|
+
);
|
|
471
|
+
state = STATE_DATA;
|
|
472
|
+
// Reconsume
|
|
473
|
+
}
|
|
474
|
+
break;
|
|
475
|
+
|
|
476
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
|
|
477
|
+
case STATE_END_TAG_OPEN:
|
|
478
|
+
// Consume the next input character:
|
|
479
|
+
// ASCII alpha
|
|
480
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
481
|
+
// Reconsume in the tag name state.
|
|
482
|
+
if (isAsciiAlpha(cc)) {
|
|
483
|
+
flushText(tagStart);
|
|
484
|
+
tagNameStart = pos;
|
|
485
|
+
state = STATE_TAG_NAME;
|
|
486
|
+
// Reconsume
|
|
487
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
488
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
489
|
+
// This is a missing-end-tag-name parse error. Switch to the data state.
|
|
490
|
+
reportError("missing-end-tag-name", pos, pos + 1, "warning");
|
|
491
|
+
state = STATE_DATA;
|
|
492
|
+
pos++;
|
|
493
|
+
} else {
|
|
494
|
+
// Anything else
|
|
495
|
+
// This is an invalid-first-character-of-tag-name parse error. Create a
|
|
496
|
+
// comment token whose data is the empty string. Reconsume in the bogus
|
|
497
|
+
// comment state.
|
|
498
|
+
reportError(
|
|
499
|
+
"invalid-first-character-of-tag-name",
|
|
500
|
+
pos,
|
|
501
|
+
pos + 1,
|
|
502
|
+
"warning"
|
|
503
|
+
);
|
|
504
|
+
flushText(tagStart);
|
|
505
|
+
commentStart = tagStart;
|
|
506
|
+
state = STATE_BOGUS_COMMENT;
|
|
507
|
+
// Reconsume — let bogus-comment consume this char itself.
|
|
508
|
+
}
|
|
509
|
+
break;
|
|
510
|
+
|
|
511
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
|
512
|
+
case STATE_TAG_NAME:
|
|
513
|
+
// Consume the next input character:
|
|
514
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
515
|
+
// U+000A LINE FEED (LF)
|
|
516
|
+
// U+000C FORM FEED (FF)
|
|
517
|
+
// U+0020 SPACE
|
|
518
|
+
// Switch to the before attribute name state.
|
|
519
|
+
if (isSpace(cc)) {
|
|
520
|
+
tagNameEnd = pos;
|
|
521
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
522
|
+
pos++;
|
|
523
|
+
} else if (cc === CC_SOLIDUS) {
|
|
524
|
+
// U+002F SOLIDUS (/)
|
|
525
|
+
// Switch to the self-closing start tag state.
|
|
526
|
+
tagNameEnd = pos;
|
|
527
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
528
|
+
pos++;
|
|
529
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
530
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
531
|
+
// Switch to the data state. Emit the current tag token.
|
|
532
|
+
tagNameEnd = pos;
|
|
533
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
534
|
+
state = STATE_DATA;
|
|
535
|
+
pos = emitCloseTag(pos + 1);
|
|
536
|
+
} else {
|
|
537
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
538
|
+
state =
|
|
539
|
+
nextPos > pos + 1
|
|
540
|
+
? STATE_DATA
|
|
541
|
+
: getContentModeForTag(lastOpenTagName);
|
|
542
|
+
pos = nextPos;
|
|
543
|
+
}
|
|
544
|
+
} else {
|
|
545
|
+
pos++;
|
|
546
|
+
}
|
|
547
|
+
break;
|
|
548
|
+
|
|
549
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
550
|
+
case STATE_BEFORE_ATTRIBUTE_NAME:
|
|
551
|
+
// Consume the next input character:
|
|
552
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
553
|
+
// U+000A LINE FEED (LF)
|
|
554
|
+
// U+000C FORM FEED (FF)
|
|
555
|
+
// U+0020 SPACE
|
|
556
|
+
// Ignore the character.
|
|
557
|
+
// Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
|
|
558
|
+
if (isSpace(cc)) {
|
|
559
|
+
pos++;
|
|
560
|
+
} else if (cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
561
|
+
// U+002F SOLIDUS (/)
|
|
562
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
563
|
+
// EOF
|
|
564
|
+
// Reconsume in the after attribute name state.
|
|
565
|
+
state = STATE_AFTER_ATTRIBUTE_NAME;
|
|
566
|
+
// Reconsume
|
|
567
|
+
} else if (cc === CC_EQUALS) {
|
|
568
|
+
// U+003D EQUALS SIGN (=)
|
|
569
|
+
// This is an unexpected-equals-sign-before-attribute-name parse
|
|
570
|
+
// error. Start a new attribute. Switch to the attribute name state.
|
|
571
|
+
reportError(
|
|
572
|
+
"unexpected-equals-sign-before-attribute-name",
|
|
573
|
+
pos,
|
|
574
|
+
pos + 1,
|
|
575
|
+
"warning"
|
|
576
|
+
);
|
|
577
|
+
attrNameStart = pos;
|
|
578
|
+
state = STATE_ATTRIBUTE_NAME;
|
|
579
|
+
pos++;
|
|
580
|
+
} else {
|
|
581
|
+
// Anything else
|
|
582
|
+
// Start a new attribute in the current tag token. Set that attribute name
|
|
583
|
+
// and value to the empty string. Reconsume in the attribute name state.
|
|
584
|
+
attrNameStart = pos;
|
|
585
|
+
state = STATE_ATTRIBUTE_NAME;
|
|
586
|
+
// Reconsume
|
|
587
|
+
}
|
|
588
|
+
break;
|
|
589
|
+
|
|
590
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
591
|
+
case STATE_ATTRIBUTE_NAME:
|
|
592
|
+
// Consume the next input character:
|
|
593
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
594
|
+
// U+000A LINE FEED (LF)
|
|
595
|
+
// U+000C FORM FEED (FF)
|
|
596
|
+
// U+0020 SPACE
|
|
597
|
+
// U+002F SOLIDUS (/)
|
|
598
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
599
|
+
// EOF
|
|
600
|
+
// Reconsume in the after attribute name state.
|
|
601
|
+
if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
602
|
+
attrNameEnd = pos;
|
|
603
|
+
state = STATE_AFTER_ATTRIBUTE_NAME;
|
|
604
|
+
// Reconsume
|
|
605
|
+
} else if (cc === CC_EQUALS) {
|
|
606
|
+
attrNameEnd = pos;
|
|
607
|
+
state = STATE_BEFORE_ATTRIBUTE_VALUE;
|
|
608
|
+
pos++;
|
|
609
|
+
} else {
|
|
610
|
+
pos++;
|
|
611
|
+
}
|
|
612
|
+
break;
|
|
613
|
+
|
|
614
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
|
|
615
|
+
case STATE_AFTER_ATTRIBUTE_NAME:
|
|
616
|
+
// Consume the next input character:
|
|
617
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
618
|
+
// U+000A LINE FEED (LF)
|
|
619
|
+
// U+000C FORM FEED (FF)
|
|
620
|
+
// U+0020 SPACE
|
|
621
|
+
// Ignore the character.
|
|
622
|
+
if (isSpace(cc)) {
|
|
623
|
+
pos++;
|
|
624
|
+
} else if (cc === CC_SOLIDUS) {
|
|
625
|
+
// U+002F SOLIDUS (/)
|
|
626
|
+
// Switch to the self-closing start tag state.
|
|
627
|
+
emitAttribute(pos);
|
|
628
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
629
|
+
pos++;
|
|
630
|
+
} else if (cc === CC_EQUALS) {
|
|
631
|
+
// U+003D EQUALS SIGN (=)
|
|
632
|
+
// Switch to the before attribute value state.
|
|
633
|
+
state = STATE_BEFORE_ATTRIBUTE_VALUE;
|
|
634
|
+
pos++;
|
|
635
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
636
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
637
|
+
// Switch to the data state. Emit the current tag token.
|
|
638
|
+
emitAttribute(pos);
|
|
639
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
640
|
+
state = STATE_DATA;
|
|
641
|
+
pos = emitCloseTag(pos + 1);
|
|
642
|
+
} else {
|
|
643
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
644
|
+
state =
|
|
645
|
+
nextPos > pos + 1
|
|
646
|
+
? STATE_DATA
|
|
647
|
+
: getContentModeForTag(lastOpenTagName);
|
|
648
|
+
pos = nextPos;
|
|
649
|
+
}
|
|
650
|
+
} else {
|
|
651
|
+
// Anything else
|
|
652
|
+
// Start a new attribute in the current tag token.
|
|
653
|
+
emitAttribute(pos);
|
|
654
|
+
attrNameStart = pos;
|
|
655
|
+
state = STATE_ATTRIBUTE_NAME;
|
|
656
|
+
// Reconsume
|
|
657
|
+
}
|
|
658
|
+
break;
|
|
659
|
+
|
|
660
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
|
|
661
|
+
case STATE_BEFORE_ATTRIBUTE_VALUE:
|
|
662
|
+
// Consume the next input character:
|
|
663
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
664
|
+
// U+000A LINE FEED (LF)
|
|
665
|
+
// U+000C FORM FEED (FF)
|
|
666
|
+
// U+0020 SPACE
|
|
667
|
+
// Ignore the character.
|
|
668
|
+
if (isSpace(cc)) {
|
|
669
|
+
pos++;
|
|
670
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
671
|
+
// U+0022 QUOTATION MARK (")
|
|
672
|
+
// Switch to the attribute value (double-quoted) state.
|
|
673
|
+
attrValueStart = pos + 1;
|
|
674
|
+
attrQuoteType = QUOTE_DOUBLE;
|
|
675
|
+
state = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
|
|
676
|
+
pos++;
|
|
677
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
678
|
+
// U+0027 APOSTROPHE (')
|
|
679
|
+
// Switch to the attribute value (single-quoted) state.
|
|
680
|
+
attrValueStart = pos + 1;
|
|
681
|
+
attrQuoteType = QUOTE_SINGLE;
|
|
682
|
+
state = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
|
|
683
|
+
pos++;
|
|
684
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
685
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
686
|
+
// This is a missing-attribute-value parse error. Switch to the data
|
|
687
|
+
// state. Emit the current tag token. The attribute is reported with
|
|
688
|
+
// an empty value range pointing at the `>` so the open-tag offset range
|
|
689
|
+
// still includes the `>`.
|
|
690
|
+
reportError("missing-attribute-value", pos, pos + 1, "warning");
|
|
691
|
+
attrValueStart = pos;
|
|
692
|
+
attrQuoteType = QUOTE_NONE;
|
|
693
|
+
pos = emitAttribute(pos);
|
|
694
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
695
|
+
state = STATE_DATA;
|
|
696
|
+
pos = emitCloseTag(pos + 1);
|
|
697
|
+
} else {
|
|
698
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
699
|
+
state =
|
|
700
|
+
nextPos > pos + 1
|
|
701
|
+
? STATE_DATA
|
|
702
|
+
: getContentModeForTag(lastOpenTagName);
|
|
703
|
+
pos = nextPos;
|
|
704
|
+
}
|
|
705
|
+
} else {
|
|
706
|
+
// Anything else
|
|
707
|
+
// Reconsume in the attribute value (unquoted) state.
|
|
708
|
+
attrValueStart = pos;
|
|
709
|
+
attrQuoteType = QUOTE_NONE;
|
|
710
|
+
state = STATE_ATTRIBUTE_VALUE_UNQUOTED;
|
|
711
|
+
// Reconsume
|
|
712
|
+
}
|
|
713
|
+
break;
|
|
714
|
+
|
|
715
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
|
|
716
|
+
case STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED:
|
|
717
|
+
// Consume the next input character:
|
|
718
|
+
// U+0022 QUOTATION MARK (")
|
|
719
|
+
// Switch to the after attribute value (quoted) state.
|
|
720
|
+
if (cc === CC_QUOTATION_MARK) {
|
|
721
|
+
pos = emitAttribute(pos);
|
|
722
|
+
state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
|
|
723
|
+
} else if (cc === CC_AMPERSAND) {
|
|
724
|
+
// U+0026 AMPERSAND (&)
|
|
725
|
+
// Set the return state to the attribute value (double-quoted)
|
|
726
|
+
// state. Switch to the character reference state.
|
|
727
|
+
returnState = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
|
|
728
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
729
|
+
pos++;
|
|
730
|
+
} else {
|
|
731
|
+
pos++;
|
|
732
|
+
}
|
|
733
|
+
break;
|
|
734
|
+
|
|
735
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
|
|
736
|
+
case STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED:
|
|
737
|
+
// Consume the next input character:
|
|
738
|
+
// U+0027 APOSTROPHE (')
|
|
739
|
+
// Switch to the after attribute value (quoted) state.
|
|
740
|
+
if (cc === CC_APOSTROPHE) {
|
|
741
|
+
pos = emitAttribute(pos);
|
|
742
|
+
state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
|
|
743
|
+
} else if (cc === CC_AMPERSAND) {
|
|
744
|
+
// U+0026 AMPERSAND (&)
|
|
745
|
+
// Set the return state to the attribute value (single-quoted)
|
|
746
|
+
// state. Switch to the character reference state.
|
|
747
|
+
returnState = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
|
|
748
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
749
|
+
pos++;
|
|
750
|
+
} else {
|
|
751
|
+
pos++;
|
|
752
|
+
}
|
|
753
|
+
break;
|
|
754
|
+
|
|
755
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
|
|
756
|
+
case STATE_ATTRIBUTE_VALUE_UNQUOTED:
|
|
757
|
+
if (isSpace(cc)) {
|
|
758
|
+
pos = emitAttribute(pos);
|
|
759
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
760
|
+
// Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
|
|
761
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
762
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
763
|
+
// This is a missing-attribute-value parse error. Switch to the data state.
|
|
764
|
+
// Emit the current tag token.
|
|
765
|
+
pos = emitAttribute(pos);
|
|
766
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
767
|
+
state = STATE_DATA;
|
|
768
|
+
pos = emitCloseTag(pos + 1);
|
|
769
|
+
} else {
|
|
770
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
771
|
+
state =
|
|
772
|
+
nextPos > pos + 1
|
|
773
|
+
? STATE_DATA
|
|
774
|
+
: getContentModeForTag(lastOpenTagName);
|
|
775
|
+
pos = nextPos;
|
|
776
|
+
}
|
|
777
|
+
} else if (cc === CC_AMPERSAND) {
|
|
778
|
+
// U+0026 AMPERSAND (&)
|
|
779
|
+
// Set the return state to the attribute value (unquoted)
|
|
780
|
+
// state. Switch to the character reference state.
|
|
781
|
+
returnState = STATE_ATTRIBUTE_VALUE_UNQUOTED;
|
|
782
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
783
|
+
pos++;
|
|
784
|
+
} else {
|
|
785
|
+
pos++;
|
|
786
|
+
}
|
|
787
|
+
break;
|
|
788
|
+
|
|
789
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
|
|
790
|
+
case STATE_AFTER_ATTRIBUTE_VALUE_QUOTED:
|
|
791
|
+
// Consume the next input character:
|
|
792
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
793
|
+
// U+000A LINE FEED (LF)
|
|
794
|
+
// U+000C FORM FEED (FF)
|
|
795
|
+
// U+0020 SPACE
|
|
796
|
+
// Switch to the before attribute name state.
|
|
797
|
+
if (isSpace(cc)) {
|
|
798
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
799
|
+
pos++;
|
|
800
|
+
} else if (cc === CC_SOLIDUS) {
|
|
801
|
+
// U+002F SOLIDUS (/)
|
|
802
|
+
// Switch to the self-closing start tag state.
|
|
803
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
804
|
+
pos++;
|
|
805
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
806
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
807
|
+
state = STATE_DATA;
|
|
808
|
+
pos = emitCloseTag(pos + 1);
|
|
809
|
+
} else {
|
|
810
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
811
|
+
state =
|
|
812
|
+
nextPos > pos + 1
|
|
813
|
+
? STATE_DATA
|
|
814
|
+
: getContentModeForTag(lastOpenTagName);
|
|
815
|
+
pos = nextPos;
|
|
816
|
+
}
|
|
817
|
+
} else {
|
|
818
|
+
// Anything else
|
|
819
|
+
// This is a missing-whitespace-between-attributes parse error. Reconsume in
|
|
820
|
+
// the before attribute name state.
|
|
821
|
+
reportError(
|
|
822
|
+
"missing-whitespace-between-attributes",
|
|
823
|
+
pos,
|
|
824
|
+
pos + 1,
|
|
825
|
+
"warning"
|
|
826
|
+
);
|
|
827
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
828
|
+
// Reconsume
|
|
829
|
+
}
|
|
830
|
+
break;
|
|
831
|
+
|
|
832
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
|
|
833
|
+
case STATE_SELF_CLOSING_START_TAG:
|
|
834
|
+
// Consume the next input character:
|
|
835
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
836
|
+
// Set the self-closing flag of the current tag token. Switch to the data
|
|
837
|
+
// state. Emit the current tag token.
|
|
838
|
+
if (cc === CC_GREATER_THAN) {
|
|
839
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
840
|
+
state = STATE_DATA;
|
|
841
|
+
pos = emitCloseTag(pos + 1);
|
|
842
|
+
} else {
|
|
843
|
+
pos = emitOpenTag(pos + 1, true);
|
|
844
|
+
state = STATE_DATA;
|
|
845
|
+
}
|
|
846
|
+
} else {
|
|
847
|
+
// Anything else
|
|
848
|
+
// This is an unexpected-solidus-in-tag parse error. Reconsume in the before
|
|
849
|
+
// attribute name state.
|
|
850
|
+
reportError("unexpected-solidus-in-tag", pos, pos + 1, "warning");
|
|
851
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
852
|
+
// Reconsume
|
|
853
|
+
}
|
|
854
|
+
break;
|
|
855
|
+
|
|
856
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
|
|
857
|
+
case STATE_MARKUP_DECLARATION_OPEN:
|
|
858
|
+
// If the next few characters are:
|
|
859
|
+
// Two U+002D HYPHEN-MINUS characters (-)
|
|
860
|
+
// Consume those two characters, create a comment token whose data
|
|
861
|
+
// is the empty string, and switch to the comment start state.
|
|
862
|
+
if (
|
|
863
|
+
cc === CC_HYPHEN_MINUS &&
|
|
864
|
+
input.charCodeAt(pos + 1) === CC_HYPHEN_MINUS
|
|
865
|
+
) {
|
|
866
|
+
pos += 2;
|
|
867
|
+
commentStart = tagStart;
|
|
868
|
+
state = STATE_COMMENT_START;
|
|
869
|
+
} else if (
|
|
870
|
+
// ASCII case-insensitive match for the word "DOCTYPE"
|
|
871
|
+
// Consume those characters and switch to the DOCTYPE state.
|
|
872
|
+
(cc === 0x44 || cc === 0x64) /* D or d */ &&
|
|
873
|
+
(input.charCodeAt(pos + 1) | 0x20) === 0x6f /* o */ &&
|
|
874
|
+
(input.charCodeAt(pos + 2) | 0x20) === 0x63 /* c */ &&
|
|
875
|
+
(input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
|
|
876
|
+
(input.charCodeAt(pos + 4) | 0x20) === 0x79 /* y */ &&
|
|
877
|
+
(input.charCodeAt(pos + 5) | 0x20) === 0x70 /* p */ &&
|
|
878
|
+
(input.charCodeAt(pos + 6) | 0x20) === 0x65 /* e */
|
|
879
|
+
) {
|
|
880
|
+
pos += 7;
|
|
881
|
+
commentStart = tagStart;
|
|
882
|
+
state = STATE_DOCTYPE;
|
|
883
|
+
} else if (
|
|
884
|
+
// The string "[CDATA[" (the five uppercase letters "CDATA" with a
|
|
885
|
+
// U+005B LEFT SQUARE BRACKET character before and after)
|
|
886
|
+
// Consume those characters and switch to the CDATA section state.
|
|
887
|
+
cc === CC_LEFT_SQUARE_BRACKET &&
|
|
888
|
+
input.charCodeAt(pos + 1) === 0x43 /* C */ &&
|
|
889
|
+
input.charCodeAt(pos + 2) === 0x44 /* D */ &&
|
|
890
|
+
input.charCodeAt(pos + 3) === 0x41 /* A */ &&
|
|
891
|
+
input.charCodeAt(pos + 4) === 0x54 /* T */ &&
|
|
892
|
+
input.charCodeAt(pos + 5) === 0x41 /* A */ &&
|
|
893
|
+
input.charCodeAt(pos + 6) === CC_LEFT_SQUARE_BRACKET
|
|
894
|
+
) {
|
|
895
|
+
pos += 7;
|
|
896
|
+
commentStart = tagStart;
|
|
897
|
+
state = STATE_CDATA_SECTION;
|
|
898
|
+
} else {
|
|
899
|
+
// Anything else
|
|
900
|
+
// This is an incorrectly-opened-comment parse error. Create a comment token
|
|
901
|
+
// whose data is the empty string. Switch to the bogus comment state (don't
|
|
902
|
+
// consume anything in the current state).
|
|
903
|
+
reportError("incorrectly-opened-comment", tagStart, pos, "warning");
|
|
904
|
+
commentStart = tagStart;
|
|
905
|
+
state = STATE_BOGUS_COMMENT;
|
|
906
|
+
// Reconsume
|
|
907
|
+
}
|
|
908
|
+
break;
|
|
909
|
+
|
|
910
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
|
|
911
|
+
case STATE_COMMENT_START:
|
|
912
|
+
// Consume the next input character:
|
|
913
|
+
// U+002D HYPHEN-MINUS (-)
|
|
914
|
+
// Switch to the comment start dash state.
|
|
915
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
916
|
+
state = STATE_COMMENT_START_DASH;
|
|
917
|
+
pos++;
|
|
918
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
919
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
920
|
+
// This is an abrupt-closing-of-empty-comment parse error. Switch to the
|
|
921
|
+
// data state. Emit the current comment token.
|
|
922
|
+
reportError(
|
|
923
|
+
"abrupt-closing-of-empty-comment",
|
|
924
|
+
pos,
|
|
925
|
+
pos + 1,
|
|
926
|
+
"warning"
|
|
927
|
+
);
|
|
928
|
+
let nextPos = pos + 1;
|
|
929
|
+
if (callbacks.comment !== undefined) {
|
|
930
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
931
|
+
}
|
|
932
|
+
state = STATE_DATA;
|
|
933
|
+
textStart = nextPos;
|
|
934
|
+
pos = nextPos;
|
|
935
|
+
} else {
|
|
936
|
+
// Anything else
|
|
937
|
+
// Reconsume in the comment state.
|
|
938
|
+
state = STATE_COMMENT;
|
|
939
|
+
// Reconsume
|
|
940
|
+
}
|
|
941
|
+
break;
|
|
942
|
+
|
|
943
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
|
|
944
|
+
case STATE_COMMENT_START_DASH:
|
|
945
|
+
// Consume the next input character:
|
|
946
|
+
// U+002D HYPHEN-MINUS (-)
|
|
947
|
+
// Switch to the comment end state.
|
|
948
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
949
|
+
state = STATE_COMMENT_END;
|
|
950
|
+
pos++;
|
|
951
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
952
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
953
|
+
// This is an abrupt-closing-of-empty-comment parse error. Switch to the
|
|
954
|
+
// data state. Emit the current comment token.
|
|
955
|
+
reportError(
|
|
956
|
+
"abrupt-closing-of-empty-comment",
|
|
957
|
+
pos,
|
|
958
|
+
pos + 1,
|
|
959
|
+
"warning"
|
|
960
|
+
);
|
|
961
|
+
let nextPos = pos + 1;
|
|
962
|
+
if (callbacks.comment !== undefined) {
|
|
963
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
964
|
+
}
|
|
965
|
+
state = STATE_DATA;
|
|
966
|
+
textStart = nextPos;
|
|
967
|
+
pos = nextPos;
|
|
968
|
+
} else {
|
|
969
|
+
// Anything else
|
|
970
|
+
// Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
|
|
971
|
+
// Reconsume in the comment state.
|
|
972
|
+
state = STATE_COMMENT;
|
|
973
|
+
// Reconsume
|
|
974
|
+
}
|
|
975
|
+
break;
|
|
976
|
+
|
|
977
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-state
|
|
978
|
+
case STATE_COMMENT:
|
|
979
|
+
// Consume the next input character:
|
|
980
|
+
// U+003C LESS-THAN SIGN (<)
|
|
981
|
+
// Append a U+003C LESS-THAN SIGN character to the comment token's data. Switch to the comment less-than sign state.
|
|
982
|
+
if (cc === CC_LESS_THAN) {
|
|
983
|
+
state = STATE_COMMENT_LESS_THAN_SIGN;
|
|
984
|
+
pos++;
|
|
985
|
+
} else if (cc === CC_HYPHEN_MINUS) {
|
|
986
|
+
// Consume the next input character:
|
|
987
|
+
// U+002D HYPHEN-MINUS (-)
|
|
988
|
+
// Switch to the comment end dash state.
|
|
989
|
+
state = STATE_COMMENT_END_DASH;
|
|
990
|
+
pos++;
|
|
991
|
+
} else {
|
|
992
|
+
pos++;
|
|
993
|
+
}
|
|
994
|
+
break;
|
|
995
|
+
|
|
996
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
|
|
997
|
+
case STATE_COMMENT_END_DASH:
|
|
998
|
+
// Consume the next input character:
|
|
999
|
+
// U+002D HYPHEN-MINUS (-)
|
|
1000
|
+
// Switch to the comment end state.
|
|
1001
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
1002
|
+
state = STATE_COMMENT_END;
|
|
1003
|
+
pos++;
|
|
1004
|
+
} else {
|
|
1005
|
+
// Anything else
|
|
1006
|
+
// Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
|
|
1007
|
+
// Reconsume in the comment state.
|
|
1008
|
+
state = STATE_COMMENT;
|
|
1009
|
+
pos++;
|
|
1010
|
+
}
|
|
1011
|
+
break;
|
|
1012
|
+
|
|
1013
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
|
1014
|
+
case STATE_COMMENT_END:
|
|
1015
|
+
// Consume the next input character:
|
|
1016
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1017
|
+
// Switch to the data state. Emit the current comment token.
|
|
1018
|
+
if (cc === CC_GREATER_THAN) {
|
|
1019
|
+
let nextPos = pos + 1;
|
|
1020
|
+
if (callbacks.comment !== undefined) {
|
|
1021
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
1022
|
+
}
|
|
1023
|
+
state = STATE_DATA;
|
|
1024
|
+
textStart = nextPos;
|
|
1025
|
+
pos = nextPos;
|
|
1026
|
+
} else if (cc === CC_EXCLAMATION_MARK) {
|
|
1027
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
1028
|
+
// Switch to the comment end bang state.
|
|
1029
|
+
state = STATE_COMMENT_END_BANG;
|
|
1030
|
+
pos++;
|
|
1031
|
+
} else if (cc === CC_HYPHEN_MINUS) {
|
|
1032
|
+
pos++;
|
|
1033
|
+
} else {
|
|
1034
|
+
// Anything else
|
|
1035
|
+
// Append two U+002D HYPHEN-MINUS characters (-) to the comment token's
|
|
1036
|
+
// data. Reconsume in the comment state.
|
|
1037
|
+
state = STATE_COMMENT;
|
|
1038
|
+
pos++;
|
|
1039
|
+
}
|
|
1040
|
+
break;
|
|
1041
|
+
|
|
1042
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
|
|
1043
|
+
case STATE_COMMENT_END_BANG:
|
|
1044
|
+
// Consume the next input character:
|
|
1045
|
+
// U+002D HYPHEN-MINUS (-)
|
|
1046
|
+
// Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
|
|
1047
|
+
// MARK character (!) to the comment token's data. Switch to the comment end
|
|
1048
|
+
// dash state.
|
|
1049
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
1050
|
+
state = STATE_COMMENT_END_DASH;
|
|
1051
|
+
pos++;
|
|
1052
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1053
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1054
|
+
// This is an incorrectly-closed-comment parse error. Switch to the data
|
|
1055
|
+
// state. Emit the current comment token.
|
|
1056
|
+
reportError("incorrectly-closed-comment", pos, pos + 1, "warning");
|
|
1057
|
+
let nextPos = pos + 1;
|
|
1058
|
+
if (callbacks.comment !== undefined) {
|
|
1059
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
1060
|
+
}
|
|
1061
|
+
state = STATE_DATA;
|
|
1062
|
+
textStart = nextPos;
|
|
1063
|
+
pos = nextPos;
|
|
1064
|
+
} else {
|
|
1065
|
+
// Anything else
|
|
1066
|
+
// Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
|
|
1067
|
+
// MARK character (!) to the comment token's data. Reconsume in the comment
|
|
1068
|
+
// state.
|
|
1069
|
+
state = STATE_COMMENT;
|
|
1070
|
+
pos++;
|
|
1071
|
+
}
|
|
1072
|
+
break;
|
|
1073
|
+
|
|
1074
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
|
1075
|
+
case STATE_BOGUS_COMMENT:
|
|
1076
|
+
// Consume the next input character:
|
|
1077
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1078
|
+
// Switch to the data state. Emit the current comment token.
|
|
1079
|
+
if (cc === CC_GREATER_THAN) {
|
|
1080
|
+
let nextPos = pos + 1;
|
|
1081
|
+
if (callbacks.comment !== undefined) {
|
|
1082
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
1083
|
+
}
|
|
1084
|
+
state = STATE_DATA;
|
|
1085
|
+
textStart = nextPos;
|
|
1086
|
+
pos = nextPos;
|
|
1087
|
+
} else {
|
|
1088
|
+
pos++;
|
|
1089
|
+
}
|
|
1090
|
+
break;
|
|
1091
|
+
|
|
1092
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-state
|
|
1093
|
+
case STATE_COMMENT_LESS_THAN_SIGN:
|
|
1094
|
+
// Consume the next input character:
|
|
1095
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
1096
|
+
// Append the current input character to the comment token's data. Switch to
|
|
1097
|
+
// the comment less-than sign bang state.
|
|
1098
|
+
if (cc === CC_EXCLAMATION_MARK) {
|
|
1099
|
+
state = STATE_COMMENT_LESS_THAN_SIGN_BANG;
|
|
1100
|
+
pos++;
|
|
1101
|
+
} else if (cc === CC_LESS_THAN) {
|
|
1102
|
+
// U+003C LESS-THAN SIGN (<)
|
|
1103
|
+
// Append the current input character to the comment token's data.
|
|
1104
|
+
pos++;
|
|
1105
|
+
} else {
|
|
1106
|
+
// Anything else
|
|
1107
|
+
// Reconsume in the comment state.
|
|
1108
|
+
state = STATE_COMMENT;
|
|
1109
|
+
// Reconsume
|
|
1110
|
+
}
|
|
1111
|
+
break;
|
|
1112
|
+
|
|
1113
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-state
|
|
1114
|
+
case STATE_COMMENT_LESS_THAN_SIGN_BANG:
|
|
1115
|
+
// Consume the next input character:
|
|
1116
|
+
// U+002D HYPHEN-MINUS (-)
|
|
1117
|
+
// Switch to the comment less-than sign bang dash state.
|
|
1118
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
1119
|
+
state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH;
|
|
1120
|
+
pos++;
|
|
1121
|
+
} else {
|
|
1122
|
+
// Anything else
|
|
1123
|
+
// Reconsume in the comment state.
|
|
1124
|
+
state = STATE_COMMENT;
|
|
1125
|
+
// Reconsume
|
|
1126
|
+
}
|
|
1127
|
+
break;
|
|
1128
|
+
|
|
1129
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-state
|
|
1130
|
+
case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH:
|
|
1131
|
+
// Consume the next input character:
|
|
1132
|
+
// U+002D HYPHEN-MINUS (-)
|
|
1133
|
+
// Switch to the comment less-than sign bang dash dash state.
|
|
1134
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
1135
|
+
state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
|
|
1136
|
+
pos++;
|
|
1137
|
+
} else {
|
|
1138
|
+
// Anything else
|
|
1139
|
+
// Reconsume in the comment end dash state.
|
|
1140
|
+
state = STATE_COMMENT_END_DASH;
|
|
1141
|
+
// Reconsume
|
|
1142
|
+
}
|
|
1143
|
+
break;
|
|
1144
|
+
|
|
1145
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-dash-state
|
|
1146
|
+
case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH:
|
|
1147
|
+
// Consume the next input character:
|
|
1148
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1149
|
+
// EOF
|
|
1150
|
+
// Reconsume in the comment end state.
|
|
1151
|
+
// Anything else
|
|
1152
|
+
// This is a nested-comment parse error. Reconsume in the comment end state.
|
|
1153
|
+
if (cc !== CC_GREATER_THAN) {
|
|
1154
|
+
reportError("nested-comment", pos, pos + 1, "warning");
|
|
1155
|
+
}
|
|
1156
|
+
state = STATE_COMMENT_END;
|
|
1157
|
+
// Reconsume
|
|
1158
|
+
break;
|
|
1159
|
+
|
|
1160
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
|
|
1161
|
+
case STATE_DOCTYPE:
|
|
1162
|
+
// Consume the next input character:
|
|
1163
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1164
|
+
// U+000A LINE FEED (LF)
|
|
1165
|
+
// U+000C FORM FEED (FF)
|
|
1166
|
+
// U+0020 SPACE
|
|
1167
|
+
// Switch to the before DOCTYPE name state.
|
|
1168
|
+
if (isSpace(cc)) {
|
|
1169
|
+
state = STATE_BEFORE_DOCTYPE_NAME;
|
|
1170
|
+
pos++;
|
|
1171
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1172
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1173
|
+
// Reconsume in the before DOCTYPE name state.
|
|
1174
|
+
state = STATE_BEFORE_DOCTYPE_NAME;
|
|
1175
|
+
} else {
|
|
1176
|
+
// Anything else
|
|
1177
|
+
// This is a missing-whitespace-before-doctype-name parse error. Reconsume
|
|
1178
|
+
// in the before DOCTYPE name state.
|
|
1179
|
+
reportError(
|
|
1180
|
+
"missing-whitespace-before-doctype-name",
|
|
1181
|
+
pos,
|
|
1182
|
+
pos + 1,
|
|
1183
|
+
"warning"
|
|
1184
|
+
);
|
|
1185
|
+
state = STATE_BEFORE_DOCTYPE_NAME;
|
|
1186
|
+
}
|
|
1187
|
+
break;
|
|
1188
|
+
|
|
1189
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
|
|
1190
|
+
case STATE_BEFORE_DOCTYPE_NAME:
|
|
1191
|
+
// Consume the next input character:
|
|
1192
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1193
|
+
// U+000A LINE FEED (LF)
|
|
1194
|
+
// U+000C FORM FEED (FF)
|
|
1195
|
+
// U+0020 SPACE
|
|
1196
|
+
// Ignore the character.
|
|
1197
|
+
if (isSpace(cc)) {
|
|
1198
|
+
pos++;
|
|
1199
|
+
} else if (cc === 0x00) {
|
|
1200
|
+
// U+0000 NULL
|
|
1201
|
+
// This is an unexpected-null-character parse error. Create a new DOCTYPE
|
|
1202
|
+
// token. Set the token's name to a U+FFFD REPLACEMENT CHARACTER character.
|
|
1203
|
+
// Switch to the DOCTYPE name state.
|
|
1204
|
+
state = STATE_DOCTYPE_NAME;
|
|
1205
|
+
pos++;
|
|
1206
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1207
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1208
|
+
// This is a missing-doctype-name parse error. Create a new DOCTYPE token.
|
|
1209
|
+
// Set its force-quirks flag to on. Switch to the data state. Emit the
|
|
1210
|
+
// current token.
|
|
1211
|
+
reportError("missing-doctype-name", pos, pos + 1, "warning");
|
|
1212
|
+
let nextPos = pos + 1;
|
|
1213
|
+
if (callbacks.doctype !== undefined) {
|
|
1214
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1215
|
+
}
|
|
1216
|
+
state = STATE_DATA;
|
|
1217
|
+
textStart = nextPos;
|
|
1218
|
+
pos = nextPos;
|
|
1219
|
+
} else {
|
|
1220
|
+
// ASCII upper alpha
|
|
1221
|
+
// Create a new DOCTYPE token. Set the token's name to the lowercase version
|
|
1222
|
+
// of the current input character (add 0x0020 to the character's code
|
|
1223
|
+
// point). Switch to the DOCTYPE name state.
|
|
1224
|
+
// Anything else
|
|
1225
|
+
// Create a new DOCTYPE token. Set the token's name to the current input
|
|
1226
|
+
// character. Switch to the DOCTYPE name state.
|
|
1227
|
+
state = STATE_DOCTYPE_NAME;
|
|
1228
|
+
pos++;
|
|
1229
|
+
}
|
|
1230
|
+
break;
|
|
1231
|
+
|
|
1232
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
|
|
1233
|
+
case STATE_DOCTYPE_NAME:
|
|
1234
|
+
// Consume the next input character:
|
|
1235
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1236
|
+
// U+000A LINE FEED (LF)
|
|
1237
|
+
// U+000C FORM FEED (FF)
|
|
1238
|
+
// U+0020 SPACE
|
|
1239
|
+
// Switch to the after DOCTYPE name state.
|
|
1240
|
+
if (isSpace(cc)) {
|
|
1241
|
+
state = STATE_AFTER_DOCTYPE_NAME;
|
|
1242
|
+
pos++;
|
|
1243
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1244
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1245
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1246
|
+
let nextPos = pos + 1;
|
|
1247
|
+
if (callbacks.doctype !== undefined) {
|
|
1248
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1249
|
+
}
|
|
1250
|
+
state = STATE_DATA;
|
|
1251
|
+
textStart = nextPos;
|
|
1252
|
+
pos = nextPos;
|
|
1253
|
+
} else if (cc === 0x00) {
|
|
1254
|
+
// U+0000 NULL
|
|
1255
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1256
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's name.
|
|
1257
|
+
pos++;
|
|
1258
|
+
} else {
|
|
1259
|
+
// ASCII upper alpha
|
|
1260
|
+
// Append the lowercase version of the current input character (add 0x0020
|
|
1261
|
+
// to the character's code point) to the current DOCTYPE token's name.
|
|
1262
|
+
// Anything else
|
|
1263
|
+
// Append the current input character to the current DOCTYPE token's name.
|
|
1264
|
+
pos++;
|
|
1265
|
+
}
|
|
1266
|
+
break;
|
|
1267
|
+
|
|
1268
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
|
|
1269
|
+
case STATE_AFTER_DOCTYPE_NAME:
|
|
1270
|
+
// Consume the next input character:
|
|
1271
|
+
if (isSpace(cc)) {
|
|
1272
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1273
|
+
// U+000A LINE FEED (LF)
|
|
1274
|
+
// U+000C FORM FEED (FF)
|
|
1275
|
+
// U+0020 SPACE
|
|
1276
|
+
// Ignore the character.
|
|
1277
|
+
pos++;
|
|
1278
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1279
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1280
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1281
|
+
let nextPos = pos + 1;
|
|
1282
|
+
if (callbacks.doctype !== undefined) {
|
|
1283
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1284
|
+
}
|
|
1285
|
+
state = STATE_DATA;
|
|
1286
|
+
textStart = nextPos;
|
|
1287
|
+
pos = nextPos;
|
|
1288
|
+
} else if (
|
|
1289
|
+
pos + 5 < len &&
|
|
1290
|
+
(cc === 0x50 || cc === 0x70) /* P or p */ &&
|
|
1291
|
+
(input.charCodeAt(pos + 1) | 0x20) === 0x75 /* u */ &&
|
|
1292
|
+
(input.charCodeAt(pos + 2) | 0x20) === 0x62 /* b */ &&
|
|
1293
|
+
(input.charCodeAt(pos + 3) | 0x20) === 0x6c /* l */ &&
|
|
1294
|
+
(input.charCodeAt(pos + 4) | 0x20) === 0x69 /* i */ &&
|
|
1295
|
+
(input.charCodeAt(pos + 5) | 0x20) === 0x63 /* c */
|
|
1296
|
+
) {
|
|
1297
|
+
// ASCII case-insensitive match for the word "PUBLIC"
|
|
1298
|
+
pos += 6;
|
|
1299
|
+
state = STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD;
|
|
1300
|
+
} else if (
|
|
1301
|
+
pos + 5 < len &&
|
|
1302
|
+
(cc === 0x53 || cc === 0x73) /* S or s */ &&
|
|
1303
|
+
(input.charCodeAt(pos + 1) | 0x20) === 0x79 /* y */ &&
|
|
1304
|
+
(input.charCodeAt(pos + 2) | 0x20) === 0x73 /* s */ &&
|
|
1305
|
+
(input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
|
|
1306
|
+
(input.charCodeAt(pos + 4) | 0x20) === 0x65 /* e */ &&
|
|
1307
|
+
(input.charCodeAt(pos + 5) | 0x20) === 0x6d /* m */
|
|
1308
|
+
) {
|
|
1309
|
+
// ASCII case-insensitive match for the word "SYSTEM"
|
|
1310
|
+
pos += 6;
|
|
1311
|
+
state = STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD;
|
|
1312
|
+
} else {
|
|
1313
|
+
// Anything else
|
|
1314
|
+
// This is an invalid-character-sequence-after-doctype-name parse error. Set
|
|
1315
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1316
|
+
// bogus DOCTYPE state.
|
|
1317
|
+
reportError(
|
|
1318
|
+
"invalid-character-sequence-after-doctype-name",
|
|
1319
|
+
pos,
|
|
1320
|
+
pos + 1,
|
|
1321
|
+
"warning"
|
|
1322
|
+
);
|
|
1323
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1324
|
+
}
|
|
1325
|
+
break;
|
|
1326
|
+
|
|
1327
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
|
|
1328
|
+
case STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD:
|
|
1329
|
+
// Consume the next input character:
|
|
1330
|
+
if (isSpace(cc)) {
|
|
1331
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1332
|
+
// U+000A LINE FEED (LF)
|
|
1333
|
+
// U+000C FORM FEED (FF)
|
|
1334
|
+
// U+0020 SPACE
|
|
1335
|
+
// Switch to the before DOCTYPE public identifier state.
|
|
1336
|
+
state = STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
1337
|
+
pos++;
|
|
1338
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1339
|
+
// U+0022 QUOTATION MARK (")
|
|
1340
|
+
// This is a missing-whitespace-after-doctype-public-keyword parse error.
|
|
1341
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1342
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1343
|
+
// (double-quoted) state.
|
|
1344
|
+
reportError(
|
|
1345
|
+
"missing-whitespace-after-doctype-public-keyword",
|
|
1346
|
+
pos,
|
|
1347
|
+
pos + 1,
|
|
1348
|
+
"warning"
|
|
1349
|
+
);
|
|
1350
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
|
1351
|
+
pos++;
|
|
1352
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1353
|
+
// U+0027 APOSTROPHE (')
|
|
1354
|
+
// This is a missing-whitespace-after-doctype-public-keyword parse error.
|
|
1355
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1356
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1357
|
+
// (single-quoted) state.
|
|
1358
|
+
reportError(
|
|
1359
|
+
"missing-whitespace-after-doctype-public-keyword",
|
|
1360
|
+
pos,
|
|
1361
|
+
pos + 1,
|
|
1362
|
+
"warning"
|
|
1363
|
+
);
|
|
1364
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
|
1365
|
+
pos++;
|
|
1366
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1367
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1368
|
+
// This is a missing-doctype-public-identifier parse error. Set the current
|
|
1369
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1370
|
+
// the current DOCTYPE token.
|
|
1371
|
+
reportError(
|
|
1372
|
+
"missing-doctype-public-identifier",
|
|
1373
|
+
pos,
|
|
1374
|
+
pos + 1,
|
|
1375
|
+
"warning"
|
|
1376
|
+
);
|
|
1377
|
+
let nextPos = pos + 1;
|
|
1378
|
+
if (callbacks.doctype !== undefined) {
|
|
1379
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1380
|
+
}
|
|
1381
|
+
state = STATE_DATA;
|
|
1382
|
+
textStart = nextPos;
|
|
1383
|
+
pos = nextPos;
|
|
1384
|
+
} else {
|
|
1385
|
+
// Anything else
|
|
1386
|
+
// This is a missing-quote-before-doctype-public-identifier parse error. Set
|
|
1387
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1388
|
+
// bogus DOCTYPE state.
|
|
1389
|
+
reportError(
|
|
1390
|
+
"missing-quote-before-doctype-public-identifier",
|
|
1391
|
+
pos,
|
|
1392
|
+
pos + 1,
|
|
1393
|
+
"warning"
|
|
1394
|
+
);
|
|
1395
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1396
|
+
}
|
|
1397
|
+
break;
|
|
1398
|
+
|
|
1399
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
|
|
1400
|
+
case STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
|
|
1401
|
+
// Consume the next input character:
|
|
1402
|
+
if (isSpace(cc)) {
|
|
1403
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1404
|
+
// U+000A LINE FEED (LF)
|
|
1405
|
+
// U+000C FORM FEED (FF)
|
|
1406
|
+
// U+0020 SPACE
|
|
1407
|
+
// Ignore the character.
|
|
1408
|
+
pos++;
|
|
1409
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1410
|
+
// U+0022 QUOTATION MARK (")
|
|
1411
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1412
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1413
|
+
// (double-quoted) state.
|
|
1414
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
|
1415
|
+
pos++;
|
|
1416
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1417
|
+
// U+0027 APOSTROPHE (')
|
|
1418
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1419
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1420
|
+
// (single-quoted) state.
|
|
1421
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
|
1422
|
+
pos++;
|
|
1423
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1424
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1425
|
+
// This is a missing-doctype-public-identifier parse error. Set the current
|
|
1426
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1427
|
+
// the current DOCTYPE token.
|
|
1428
|
+
reportError(
|
|
1429
|
+
"missing-doctype-public-identifier",
|
|
1430
|
+
pos,
|
|
1431
|
+
pos + 1,
|
|
1432
|
+
"warning"
|
|
1433
|
+
);
|
|
1434
|
+
let nextPos = pos + 1;
|
|
1435
|
+
if (callbacks.doctype !== undefined) {
|
|
1436
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1437
|
+
}
|
|
1438
|
+
state = STATE_DATA;
|
|
1439
|
+
textStart = nextPos;
|
|
1440
|
+
pos = nextPos;
|
|
1441
|
+
} else {
|
|
1442
|
+
// Anything else
|
|
1443
|
+
// This is a missing-quote-before-doctype-public-identifier parse error. Set
|
|
1444
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1445
|
+
// bogus DOCTYPE state.
|
|
1446
|
+
reportError(
|
|
1447
|
+
"missing-quote-before-doctype-public-identifier",
|
|
1448
|
+
pos,
|
|
1449
|
+
pos + 1,
|
|
1450
|
+
"warning"
|
|
1451
|
+
);
|
|
1452
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1453
|
+
}
|
|
1454
|
+
break;
|
|
1455
|
+
|
|
1456
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
|
|
1457
|
+
case STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
|
|
1458
|
+
// Consume the next input character:
|
|
1459
|
+
if (cc === CC_QUOTATION_MARK) {
|
|
1460
|
+
// U+0022 QUOTATION MARK (")
|
|
1461
|
+
// Switch to the after DOCTYPE public identifier state.
|
|
1462
|
+
state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
1463
|
+
pos++;
|
|
1464
|
+
} else if (cc === 0x00) {
|
|
1465
|
+
// U+0000 NULL
|
|
1466
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1467
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's public
|
|
1468
|
+
// identifier.
|
|
1469
|
+
pos++;
|
|
1470
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1471
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1472
|
+
// This is an abrupt-doctype-public-identifier parse error. Set the current
|
|
1473
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1474
|
+
// the current DOCTYPE token.
|
|
1475
|
+
reportError(
|
|
1476
|
+
"abrupt-doctype-public-identifier",
|
|
1477
|
+
pos,
|
|
1478
|
+
pos + 1,
|
|
1479
|
+
"warning"
|
|
1480
|
+
);
|
|
1481
|
+
let nextPos = pos + 1;
|
|
1482
|
+
if (callbacks.doctype !== undefined) {
|
|
1483
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1484
|
+
}
|
|
1485
|
+
state = STATE_DATA;
|
|
1486
|
+
textStart = nextPos;
|
|
1487
|
+
pos = nextPos;
|
|
1488
|
+
} else {
|
|
1489
|
+
// Anything else
|
|
1490
|
+
// Append the current input character to the current DOCTYPE token's public
|
|
1491
|
+
// identifier.
|
|
1492
|
+
pos++;
|
|
1493
|
+
}
|
|
1494
|
+
break;
|
|
1495
|
+
|
|
1496
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
|
|
1497
|
+
case STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
|
|
1498
|
+
// Consume the next input character:
|
|
1499
|
+
if (cc === CC_APOSTROPHE) {
|
|
1500
|
+
// U+0027 APOSTROPHE (')
|
|
1501
|
+
// Switch to the after DOCTYPE public identifier state.
|
|
1502
|
+
state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
1503
|
+
pos++;
|
|
1504
|
+
} else if (cc === 0x00) {
|
|
1505
|
+
// U+0000 NULL
|
|
1506
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1507
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's public
|
|
1508
|
+
// identifier.
|
|
1509
|
+
pos++;
|
|
1510
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1511
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1512
|
+
// This is an abrupt-doctype-public-identifier parse error. Set the current
|
|
1513
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1514
|
+
// the current DOCTYPE token.
|
|
1515
|
+
reportError(
|
|
1516
|
+
"abrupt-doctype-public-identifier",
|
|
1517
|
+
pos,
|
|
1518
|
+
pos + 1,
|
|
1519
|
+
"warning"
|
|
1520
|
+
);
|
|
1521
|
+
let nextPos = pos + 1;
|
|
1522
|
+
if (callbacks.doctype !== undefined) {
|
|
1523
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1524
|
+
}
|
|
1525
|
+
state = STATE_DATA;
|
|
1526
|
+
textStart = nextPos;
|
|
1527
|
+
pos = nextPos;
|
|
1528
|
+
} else {
|
|
1529
|
+
// Anything else
|
|
1530
|
+
// Append the current input character to the current DOCTYPE token's public
|
|
1531
|
+
// identifier.
|
|
1532
|
+
pos++;
|
|
1533
|
+
}
|
|
1534
|
+
break;
|
|
1535
|
+
|
|
1536
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
|
|
1537
|
+
case STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
|
|
1538
|
+
// Consume the next input character:
|
|
1539
|
+
if (isSpace(cc)) {
|
|
1540
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1541
|
+
// U+000A LINE FEED (LF)
|
|
1542
|
+
// U+000C FORM FEED (FF)
|
|
1543
|
+
// U+0020 SPACE
|
|
1544
|
+
// Switch to the between DOCTYPE public and system identifiers state.
|
|
1545
|
+
state = STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
|
|
1546
|
+
pos++;
|
|
1547
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1548
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1549
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1550
|
+
let nextPos = pos + 1;
|
|
1551
|
+
if (callbacks.doctype !== undefined) {
|
|
1552
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1553
|
+
}
|
|
1554
|
+
state = STATE_DATA;
|
|
1555
|
+
textStart = nextPos;
|
|
1556
|
+
pos = nextPos;
|
|
1557
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1558
|
+
// U+0022 QUOTATION MARK (")
|
|
1559
|
+
// This is a missing-whitespace-between-doctype-public-and-system-identifiers
|
|
1560
|
+
// parse error. Set the current DOCTYPE token's system
|
|
1561
|
+
// identifier to the empty string (not missing), then switch
|
|
1562
|
+
// to the DOCTYPE system identifier (double-quoted) state.
|
|
1563
|
+
reportError(
|
|
1564
|
+
"missing-whitespace-between-doctype-public-and-system-identifiers",
|
|
1565
|
+
pos,
|
|
1566
|
+
pos + 1,
|
|
1567
|
+
"warning"
|
|
1568
|
+
);
|
|
1569
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1570
|
+
pos++;
|
|
1571
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1572
|
+
// U+0027 APOSTROPHE (')
|
|
1573
|
+
// This is a missing-whitespace-between-doctype-public-and-system-identifiers
|
|
1574
|
+
// parse error. Set the current DOCTYPE token's system
|
|
1575
|
+
// identifier to the empty string (not missing), then switch
|
|
1576
|
+
// to the DOCTYPE system identifier (single-quoted) state.
|
|
1577
|
+
reportError(
|
|
1578
|
+
"missing-whitespace-between-doctype-public-and-system-identifiers",
|
|
1579
|
+
pos,
|
|
1580
|
+
pos + 1,
|
|
1581
|
+
"warning"
|
|
1582
|
+
);
|
|
1583
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1584
|
+
pos++;
|
|
1585
|
+
} else {
|
|
1586
|
+
// Anything else
|
|
1587
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1588
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1589
|
+
// bogus DOCTYPE state.
|
|
1590
|
+
reportError(
|
|
1591
|
+
"missing-quote-before-doctype-system-identifier",
|
|
1592
|
+
pos,
|
|
1593
|
+
pos + 1,
|
|
1594
|
+
"warning"
|
|
1595
|
+
);
|
|
1596
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1597
|
+
}
|
|
1598
|
+
break;
|
|
1599
|
+
|
|
1600
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
|
|
1601
|
+
case STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
|
|
1602
|
+
// Consume the next input character:
|
|
1603
|
+
if (isSpace(cc)) {
|
|
1604
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1605
|
+
// U+000A LINE FEED (LF)
|
|
1606
|
+
// U+000C FORM FEED (FF)
|
|
1607
|
+
// U+0020 SPACE
|
|
1608
|
+
// Ignore the character.
|
|
1609
|
+
pos++;
|
|
1610
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1611
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1612
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1613
|
+
let nextPos = pos + 1;
|
|
1614
|
+
if (callbacks.doctype !== undefined) {
|
|
1615
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1616
|
+
}
|
|
1617
|
+
state = STATE_DATA;
|
|
1618
|
+
textStart = nextPos;
|
|
1619
|
+
pos = nextPos;
|
|
1620
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1621
|
+
// U+0022 QUOTATION MARK (")
|
|
1622
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1623
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1624
|
+
// (double-quoted) state.
|
|
1625
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1626
|
+
pos++;
|
|
1627
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1628
|
+
// U+0027 APOSTROPHE (')
|
|
1629
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1630
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1631
|
+
// (single-quoted) state.
|
|
1632
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1633
|
+
pos++;
|
|
1634
|
+
} else {
|
|
1635
|
+
// Anything else
|
|
1636
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1637
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1638
|
+
// bogus DOCTYPE state.
|
|
1639
|
+
reportError(
|
|
1640
|
+
"missing-quote-before-doctype-system-identifier",
|
|
1641
|
+
pos,
|
|
1642
|
+
pos + 1,
|
|
1643
|
+
"warning"
|
|
1644
|
+
);
|
|
1645
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1646
|
+
}
|
|
1647
|
+
break;
|
|
1648
|
+
|
|
1649
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
|
|
1650
|
+
case STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD:
|
|
1651
|
+
// Consume the next input character:
|
|
1652
|
+
if (isSpace(cc)) {
|
|
1653
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1654
|
+
// U+000A LINE FEED (LF)
|
|
1655
|
+
// U+000C FORM FEED (FF)
|
|
1656
|
+
// U+0020 SPACE
|
|
1657
|
+
// Switch to the before DOCTYPE system identifier state.
|
|
1658
|
+
state = STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
1659
|
+
pos++;
|
|
1660
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1661
|
+
// U+0022 QUOTATION MARK (")
|
|
1662
|
+
// This is a missing-whitespace-after-doctype-system-keyword parse error.
|
|
1663
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1664
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1665
|
+
// (double-quoted) state.
|
|
1666
|
+
reportError(
|
|
1667
|
+
"missing-whitespace-after-doctype-system-keyword",
|
|
1668
|
+
pos,
|
|
1669
|
+
pos + 1,
|
|
1670
|
+
"warning"
|
|
1671
|
+
);
|
|
1672
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1673
|
+
pos++;
|
|
1674
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1675
|
+
// U+0027 APOSTROPHE (')
|
|
1676
|
+
// This is a missing-whitespace-after-doctype-system-keyword parse error.
|
|
1677
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1678
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1679
|
+
// (single-quoted) state.
|
|
1680
|
+
reportError(
|
|
1681
|
+
"missing-whitespace-after-doctype-system-keyword",
|
|
1682
|
+
pos,
|
|
1683
|
+
pos + 1,
|
|
1684
|
+
"warning"
|
|
1685
|
+
);
|
|
1686
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1687
|
+
pos++;
|
|
1688
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1689
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1690
|
+
// This is a missing-doctype-system-identifier parse error. Set the current
|
|
1691
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1692
|
+
// the current DOCTYPE token.
|
|
1693
|
+
reportError(
|
|
1694
|
+
"missing-doctype-system-identifier",
|
|
1695
|
+
pos,
|
|
1696
|
+
pos + 1,
|
|
1697
|
+
"warning"
|
|
1698
|
+
);
|
|
1699
|
+
let nextPos = pos + 1;
|
|
1700
|
+
if (callbacks.doctype !== undefined) {
|
|
1701
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1702
|
+
}
|
|
1703
|
+
state = STATE_DATA;
|
|
1704
|
+
textStart = nextPos;
|
|
1705
|
+
pos = nextPos;
|
|
1706
|
+
} else {
|
|
1707
|
+
// Anything else
|
|
1708
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1709
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1710
|
+
// bogus DOCTYPE state.
|
|
1711
|
+
reportError(
|
|
1712
|
+
"missing-quote-before-doctype-system-identifier",
|
|
1713
|
+
pos,
|
|
1714
|
+
pos + 1,
|
|
1715
|
+
"warning"
|
|
1716
|
+
);
|
|
1717
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1718
|
+
}
|
|
1719
|
+
break;
|
|
1720
|
+
|
|
1721
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
|
|
1722
|
+
case STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
|
|
1723
|
+
// Consume the next input character:
|
|
1724
|
+
if (isSpace(cc)) {
|
|
1725
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1726
|
+
// U+000A LINE FEED (LF)
|
|
1727
|
+
// U+000C FORM FEED (FF)
|
|
1728
|
+
// U+0020 SPACE
|
|
1729
|
+
// Ignore the character.
|
|
1730
|
+
pos++;
|
|
1731
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1732
|
+
// U+0022 QUOTATION MARK (")
|
|
1733
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1734
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1735
|
+
// (double-quoted) state.
|
|
1736
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1737
|
+
pos++;
|
|
1738
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1739
|
+
// U+0027 APOSTROPHE (')
|
|
1740
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1741
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1742
|
+
// (single-quoted) state.
|
|
1743
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1744
|
+
pos++;
|
|
1745
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1746
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1747
|
+
// This is a missing-doctype-system-identifier parse error. Set the current
|
|
1748
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1749
|
+
// the current DOCTYPE token.
|
|
1750
|
+
reportError(
|
|
1751
|
+
"missing-doctype-system-identifier",
|
|
1752
|
+
pos,
|
|
1753
|
+
pos + 1,
|
|
1754
|
+
"warning"
|
|
1755
|
+
);
|
|
1756
|
+
let nextPos = pos + 1;
|
|
1757
|
+
if (callbacks.doctype !== undefined) {
|
|
1758
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1759
|
+
}
|
|
1760
|
+
state = STATE_DATA;
|
|
1761
|
+
textStart = nextPos;
|
|
1762
|
+
pos = nextPos;
|
|
1763
|
+
} else {
|
|
1764
|
+
// Anything else
|
|
1765
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1766
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1767
|
+
// bogus DOCTYPE state.
|
|
1768
|
+
reportError(
|
|
1769
|
+
"missing-quote-before-doctype-system-identifier",
|
|
1770
|
+
pos,
|
|
1771
|
+
pos + 1,
|
|
1772
|
+
"warning"
|
|
1773
|
+
);
|
|
1774
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1775
|
+
}
|
|
1776
|
+
break;
|
|
1777
|
+
|
|
1778
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
|
|
1779
|
+
case STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
|
|
1780
|
+
// Consume the next input character:
|
|
1781
|
+
if (cc === CC_QUOTATION_MARK) {
|
|
1782
|
+
// U+0022 QUOTATION MARK (")
|
|
1783
|
+
// Switch to the after DOCTYPE system identifier state.
|
|
1784
|
+
state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
1785
|
+
pos++;
|
|
1786
|
+
} else if (cc === 0x00) {
|
|
1787
|
+
// U+0000 NULL
|
|
1788
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1789
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's system
|
|
1790
|
+
// identifier.
|
|
1791
|
+
pos++;
|
|
1792
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1793
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1794
|
+
// This is an abrupt-doctype-system-identifier parse error. Set the current
|
|
1795
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1796
|
+
// the current DOCTYPE token.
|
|
1797
|
+
reportError(
|
|
1798
|
+
"abrupt-doctype-system-identifier",
|
|
1799
|
+
pos,
|
|
1800
|
+
pos + 1,
|
|
1801
|
+
"warning"
|
|
1802
|
+
);
|
|
1803
|
+
let nextPos = pos + 1;
|
|
1804
|
+
if (callbacks.doctype !== undefined) {
|
|
1805
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1806
|
+
}
|
|
1807
|
+
state = STATE_DATA;
|
|
1808
|
+
textStart = nextPos;
|
|
1809
|
+
pos = nextPos;
|
|
1810
|
+
} else {
|
|
1811
|
+
// Anything else
|
|
1812
|
+
// Append the current input character to the current DOCTYPE token's system
|
|
1813
|
+
// identifier.
|
|
1814
|
+
pos++;
|
|
1815
|
+
}
|
|
1816
|
+
break;
|
|
1817
|
+
|
|
1818
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
|
|
1819
|
+
case STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
|
|
1820
|
+
// Consume the next input character:
|
|
1821
|
+
if (cc === CC_APOSTROPHE) {
|
|
1822
|
+
// U+0027 APOSTROPHE (')
|
|
1823
|
+
// Switch to the after DOCTYPE system identifier state.
|
|
1824
|
+
state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
1825
|
+
pos++;
|
|
1826
|
+
} else if (cc === 0x00) {
|
|
1827
|
+
// U+0000 NULL
|
|
1828
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1829
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's system
|
|
1830
|
+
// identifier.
|
|
1831
|
+
pos++;
|
|
1832
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1833
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1834
|
+
// This is an abrupt-doctype-system-identifier parse error. Set the current
|
|
1835
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1836
|
+
// the current DOCTYPE token.
|
|
1837
|
+
reportError(
|
|
1838
|
+
"abrupt-doctype-system-identifier",
|
|
1839
|
+
pos,
|
|
1840
|
+
pos + 1,
|
|
1841
|
+
"warning"
|
|
1842
|
+
);
|
|
1843
|
+
let nextPos = pos + 1;
|
|
1844
|
+
if (callbacks.doctype !== undefined) {
|
|
1845
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1846
|
+
}
|
|
1847
|
+
state = STATE_DATA;
|
|
1848
|
+
textStart = nextPos;
|
|
1849
|
+
pos = nextPos;
|
|
1850
|
+
} else {
|
|
1851
|
+
// Anything else
|
|
1852
|
+
// Append the current input character to the current DOCTYPE token's system
|
|
1853
|
+
// identifier.
|
|
1854
|
+
pos++;
|
|
1855
|
+
}
|
|
1856
|
+
break;
|
|
1857
|
+
|
|
1858
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
|
|
1859
|
+
case STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
|
|
1860
|
+
// Consume the next input character:
|
|
1861
|
+
if (isSpace(cc)) {
|
|
1862
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1863
|
+
// U+000A LINE FEED (LF)
|
|
1864
|
+
// U+000C FORM FEED (FF)
|
|
1865
|
+
// U+0020 SPACE
|
|
1866
|
+
// Ignore the character.
|
|
1867
|
+
pos++;
|
|
1868
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1869
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1870
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1871
|
+
let nextPos = pos + 1;
|
|
1872
|
+
if (callbacks.doctype !== undefined) {
|
|
1873
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1874
|
+
}
|
|
1875
|
+
state = STATE_DATA;
|
|
1876
|
+
textStart = nextPos;
|
|
1877
|
+
pos = nextPos;
|
|
1878
|
+
} else {
|
|
1879
|
+
// Anything else
|
|
1880
|
+
// This is an unexpected-character-after-doctype-system-identifier parse
|
|
1881
|
+
// error. Reconsume in the bogus DOCTYPE state. (This does not set the
|
|
1882
|
+
// current DOCTYPE token's force-quirks flag to on.)
|
|
1883
|
+
reportError(
|
|
1884
|
+
"unexpected-character-after-doctype-system-identifier",
|
|
1885
|
+
pos,
|
|
1886
|
+
pos + 1,
|
|
1887
|
+
"warning"
|
|
1888
|
+
);
|
|
1889
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1890
|
+
}
|
|
1891
|
+
break;
|
|
1892
|
+
|
|
1893
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
|
|
1894
|
+
case STATE_BOGUS_DOCTYPE:
|
|
1895
|
+
// Consume the next input character:
|
|
1896
|
+
if (cc === CC_GREATER_THAN) {
|
|
1897
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1898
|
+
// Switch to the data state. Emit the DOCTYPE token.
|
|
1899
|
+
let nextPos = pos + 1;
|
|
1900
|
+
if (callbacks.doctype !== undefined) {
|
|
1901
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1902
|
+
}
|
|
1903
|
+
state = STATE_DATA;
|
|
1904
|
+
textStart = nextPos;
|
|
1905
|
+
pos = nextPos;
|
|
1906
|
+
} else if (cc === 0x00) {
|
|
1907
|
+
// U+0000 NULL
|
|
1908
|
+
// This is an unexpected-null-character parse error. Ignore the character.
|
|
1909
|
+
pos++;
|
|
1910
|
+
} else {
|
|
1911
|
+
// Anything else
|
|
1912
|
+
// Ignore the character.
|
|
1913
|
+
pos++;
|
|
1914
|
+
}
|
|
1915
|
+
break;
|
|
1916
|
+
|
|
1917
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
|
|
1918
|
+
case STATE_CDATA_SECTION:
|
|
1919
|
+
// Consume the next input character:
|
|
1920
|
+
// U+005D RIGHT SQUARE BRACKET (])
|
|
1921
|
+
// Switch to the CDATA section bracket state.
|
|
1922
|
+
if (cc === CC_RIGHT_SQUARE_BRACKET) {
|
|
1923
|
+
state = STATE_CDATA_SECTION_BRACKET;
|
|
1924
|
+
pos++;
|
|
1925
|
+
} else {
|
|
1926
|
+
// Anything else
|
|
1927
|
+
// Emit the current input character as a character token.
|
|
1928
|
+
pos++;
|
|
1929
|
+
}
|
|
1930
|
+
break;
|
|
1931
|
+
|
|
1932
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state
|
|
1933
|
+
case STATE_CDATA_SECTION_BRACKET:
|
|
1934
|
+
// Consume the next input character:
|
|
1935
|
+
// U+005D RIGHT SQUARE BRACKET (])
|
|
1936
|
+
// Switch to the CDATA section end state.
|
|
1937
|
+
if (cc === CC_RIGHT_SQUARE_BRACKET) {
|
|
1938
|
+
state = STATE_CDATA_SECTION_END;
|
|
1939
|
+
pos++;
|
|
1940
|
+
} else {
|
|
1941
|
+
// Anything else
|
|
1942
|
+
// Emit a U+005D RIGHT SQUARE BRACKET character token. Reconsume in the
|
|
1943
|
+
// CDATA section state.
|
|
1944
|
+
state = STATE_CDATA_SECTION;
|
|
1945
|
+
// Reconsume
|
|
1946
|
+
}
|
|
1947
|
+
break;
|
|
1948
|
+
|
|
1949
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state
|
|
1950
|
+
case STATE_CDATA_SECTION_END:
|
|
1951
|
+
// Consume the next input character:
|
|
1952
|
+
// U+005D RIGHT SQUARE BRACKET (])
|
|
1953
|
+
// Emit a U+005D RIGHT SQUARE BRACKET character token.
|
|
1954
|
+
if (cc === CC_RIGHT_SQUARE_BRACKET) {
|
|
1955
|
+
pos++;
|
|
1956
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1957
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1958
|
+
// Switch to the data state.
|
|
1959
|
+
let nextPos = pos + 1;
|
|
1960
|
+
if (callbacks.comment !== undefined) {
|
|
1961
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
1962
|
+
}
|
|
1963
|
+
state = STATE_DATA;
|
|
1964
|
+
textStart = nextPos;
|
|
1965
|
+
pos = nextPos;
|
|
1966
|
+
} else {
|
|
1967
|
+
// Anything else
|
|
1968
|
+
// Emit two U+005D RIGHT SQUARE BRACKET character tokens. Reconsume in the
|
|
1969
|
+
// CDATA section state.
|
|
1970
|
+
state = STATE_CDATA_SECTION;
|
|
1971
|
+
// Reconsume
|
|
1972
|
+
}
|
|
1973
|
+
break;
|
|
1974
|
+
|
|
1975
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
|
|
1976
|
+
case STATE_RCDATA:
|
|
1977
|
+
// Consume the next input character:
|
|
1978
|
+
// U+003C LESS-THAN SIGN (<)
|
|
1979
|
+
// Switch to the RCDATA less-than sign state.
|
|
1980
|
+
if (cc === CC_LESS_THAN) {
|
|
1981
|
+
tagStart = pos;
|
|
1982
|
+
state = STATE_RCDATA_LESS_THAN_SIGN;
|
|
1983
|
+
pos++;
|
|
1984
|
+
} else {
|
|
1985
|
+
// Anything else
|
|
1986
|
+
// Emit the current input character as a character token.
|
|
1987
|
+
pos++;
|
|
1988
|
+
}
|
|
1989
|
+
break;
|
|
1990
|
+
|
|
1991
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
|
|
1992
|
+
case STATE_RCDATA_LESS_THAN_SIGN:
|
|
1993
|
+
// Consume the next input character:
|
|
1994
|
+
// U+002F SOLIDUS (/)
|
|
1995
|
+
// Switch to the RCDATA end tag open state. (Spec sets a
|
|
1996
|
+
// temporary buffer here; we track the would-be content via
|
|
1997
|
+
// offset ranges instead.)
|
|
1998
|
+
if (cc === CC_SOLIDUS) {
|
|
1999
|
+
state = STATE_RCDATA_END_TAG_OPEN;
|
|
2000
|
+
pos++;
|
|
2001
|
+
} else {
|
|
2002
|
+
// Anything else
|
|
2003
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RCDATA
|
|
2004
|
+
// state.
|
|
2005
|
+
state = STATE_RCDATA;
|
|
2006
|
+
// Reconsume
|
|
2007
|
+
}
|
|
2008
|
+
break;
|
|
2009
|
+
|
|
2010
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
|
|
2011
|
+
case STATE_RCDATA_END_TAG_OPEN:
|
|
2012
|
+
// Consume the next input character:
|
|
2013
|
+
// ASCII alpha
|
|
2014
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
2015
|
+
// Reconsume in the RCDATA end tag name state.
|
|
2016
|
+
if (isAsciiAlpha(cc)) {
|
|
2017
|
+
tagNameStart = pos;
|
|
2018
|
+
state = STATE_RCDATA_END_TAG_NAME;
|
|
2019
|
+
// Reconsume
|
|
2020
|
+
} else {
|
|
2021
|
+
// Anything else
|
|
2022
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
2023
|
+
// character token. Reconsume in the RCDATA state.
|
|
2024
|
+
state = STATE_RCDATA;
|
|
2025
|
+
// Reconsume
|
|
2026
|
+
}
|
|
2027
|
+
break;
|
|
2028
|
+
|
|
2029
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
|
|
2030
|
+
case STATE_RCDATA_END_TAG_NAME:
|
|
2031
|
+
// Consume the next input character:
|
|
2032
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2033
|
+
// U+000A LINE FEED (LF)
|
|
2034
|
+
// U+000C FORM FEED (FF)
|
|
2035
|
+
// U+0020 SPACE
|
|
2036
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2037
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
2038
|
+
// "anything else" entry below.
|
|
2039
|
+
if (isSpace(cc)) {
|
|
2040
|
+
tagNameEnd = pos;
|
|
2041
|
+
if (
|
|
2042
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2043
|
+
lastOpenTagName
|
|
2044
|
+
) {
|
|
2045
|
+
flushText(tagStart);
|
|
2046
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
2047
|
+
pos++;
|
|
2048
|
+
} else {
|
|
2049
|
+
state = STATE_RCDATA;
|
|
2050
|
+
// Reconsume
|
|
2051
|
+
}
|
|
2052
|
+
} else if (cc === CC_SOLIDUS) {
|
|
2053
|
+
// U+002F SOLIDUS (/)
|
|
2054
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2055
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
2056
|
+
// "anything else" entry below.
|
|
2057
|
+
tagNameEnd = pos;
|
|
2058
|
+
if (
|
|
2059
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2060
|
+
lastOpenTagName
|
|
2061
|
+
) {
|
|
2062
|
+
flushText(tagStart);
|
|
2063
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
2064
|
+
pos++;
|
|
2065
|
+
} else {
|
|
2066
|
+
state = STATE_RCDATA;
|
|
2067
|
+
// Reconsume
|
|
2068
|
+
}
|
|
2069
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2070
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2071
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2072
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
2073
|
+
// per the "anything else" entry below.
|
|
2074
|
+
tagNameEnd = pos;
|
|
2075
|
+
if (
|
|
2076
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2077
|
+
lastOpenTagName
|
|
2078
|
+
) {
|
|
2079
|
+
flushText(tagStart);
|
|
2080
|
+
state = STATE_DATA;
|
|
2081
|
+
pos = emitCloseTag(pos + 1);
|
|
2082
|
+
} else {
|
|
2083
|
+
state = STATE_RCDATA;
|
|
2084
|
+
// Reconsume
|
|
2085
|
+
}
|
|
2086
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2087
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
2088
|
+
// Append the lowercase version of the current input character to the
|
|
2089
|
+
// current tag token's tag name. Append the current input character to
|
|
2090
|
+
// the temporary buffer.
|
|
2091
|
+
pos++;
|
|
2092
|
+
} else {
|
|
2093
|
+
// Anything else
|
|
2094
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
2095
|
+
// token, and a character token for each of the characters in the temporary
|
|
2096
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
2097
|
+
// RCDATA state.
|
|
2098
|
+
state = STATE_RCDATA;
|
|
2099
|
+
// Reconsume
|
|
2100
|
+
}
|
|
2101
|
+
break;
|
|
2102
|
+
|
|
2103
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
|
|
2104
|
+
case STATE_RAWTEXT:
|
|
2105
|
+
// Consume the next input character:
|
|
2106
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2107
|
+
// Switch to the RAWTEXT less-than sign state.
|
|
2108
|
+
if (cc === CC_LESS_THAN) {
|
|
2109
|
+
tagStart = pos;
|
|
2110
|
+
state = STATE_RAWTEXT_LESS_THAN_SIGN;
|
|
2111
|
+
pos++;
|
|
2112
|
+
} else {
|
|
2113
|
+
pos++;
|
|
2114
|
+
}
|
|
2115
|
+
break;
|
|
2116
|
+
|
|
2117
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
|
|
2118
|
+
case STATE_RAWTEXT_LESS_THAN_SIGN:
|
|
2119
|
+
// Consume the next input character:
|
|
2120
|
+
// U+002F SOLIDUS (/)
|
|
2121
|
+
// Switch to the RAWTEXT end tag open state. (Spec sets a
|
|
2122
|
+
// temporary buffer here; we track via offset ranges instead.)
|
|
2123
|
+
if (cc === CC_SOLIDUS) {
|
|
2124
|
+
state = STATE_RAWTEXT_END_TAG_OPEN;
|
|
2125
|
+
pos++;
|
|
2126
|
+
} else {
|
|
2127
|
+
// Anything else
|
|
2128
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RAWTEXT
|
|
2129
|
+
// state.
|
|
2130
|
+
state = STATE_RAWTEXT;
|
|
2131
|
+
// Reconsume
|
|
2132
|
+
}
|
|
2133
|
+
break;
|
|
2134
|
+
|
|
2135
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
|
|
2136
|
+
case STATE_RAWTEXT_END_TAG_OPEN:
|
|
2137
|
+
// Consume the next input character:
|
|
2138
|
+
// ASCII alpha
|
|
2139
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
2140
|
+
// Reconsume in the RAWTEXT end tag name state.
|
|
2141
|
+
if (isAsciiAlpha(cc)) {
|
|
2142
|
+
tagNameStart = pos;
|
|
2143
|
+
state = STATE_RAWTEXT_END_TAG_NAME;
|
|
2144
|
+
// Reconsume
|
|
2145
|
+
} else {
|
|
2146
|
+
// Anything else
|
|
2147
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
2148
|
+
// character token. Reconsume in the RAWTEXT state.
|
|
2149
|
+
state = STATE_RAWTEXT;
|
|
2150
|
+
// Reconsume
|
|
2151
|
+
}
|
|
2152
|
+
break;
|
|
2153
|
+
|
|
2154
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
|
|
2155
|
+
case STATE_RAWTEXT_END_TAG_NAME:
|
|
2156
|
+
// Consume the next input character:
|
|
2157
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2158
|
+
// U+000A LINE FEED (LF)
|
|
2159
|
+
// U+000C FORM FEED (FF)
|
|
2160
|
+
// U+0020 SPACE
|
|
2161
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2162
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
2163
|
+
// "anything else" entry below.
|
|
2164
|
+
if (isSpace(cc)) {
|
|
2165
|
+
tagNameEnd = pos;
|
|
2166
|
+
if (
|
|
2167
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2168
|
+
lastOpenTagName
|
|
2169
|
+
) {
|
|
2170
|
+
flushText(tagStart);
|
|
2171
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
2172
|
+
pos++;
|
|
2173
|
+
} else {
|
|
2174
|
+
state = STATE_RAWTEXT;
|
|
2175
|
+
}
|
|
2176
|
+
} else if (cc === CC_SOLIDUS) {
|
|
2177
|
+
// U+002F SOLIDUS (/)
|
|
2178
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2179
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
2180
|
+
// "anything else" entry below.
|
|
2181
|
+
tagNameEnd = pos;
|
|
2182
|
+
if (
|
|
2183
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2184
|
+
lastOpenTagName
|
|
2185
|
+
) {
|
|
2186
|
+
flushText(tagStart);
|
|
2187
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
2188
|
+
pos++;
|
|
2189
|
+
} else {
|
|
2190
|
+
state = STATE_RAWTEXT;
|
|
2191
|
+
}
|
|
2192
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2193
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2194
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2195
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
2196
|
+
// per the "anything else" entry below.
|
|
2197
|
+
tagNameEnd = pos;
|
|
2198
|
+
if (
|
|
2199
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2200
|
+
lastOpenTagName
|
|
2201
|
+
) {
|
|
2202
|
+
flushText(tagStart);
|
|
2203
|
+
state = STATE_DATA;
|
|
2204
|
+
pos = emitCloseTag(pos + 1);
|
|
2205
|
+
} else {
|
|
2206
|
+
state = STATE_RAWTEXT;
|
|
2207
|
+
}
|
|
2208
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2209
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
2210
|
+
// Append the lowercase version of the current input character to the
|
|
2211
|
+
// current tag token's tag name. Append the current input character to
|
|
2212
|
+
// the temporary buffer.
|
|
2213
|
+
pos++;
|
|
2214
|
+
} else {
|
|
2215
|
+
// Anything else
|
|
2216
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
2217
|
+
// token, and a character token for each of the characters in the temporary
|
|
2218
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
2219
|
+
// RAWTEXT state.
|
|
2220
|
+
state = STATE_RAWTEXT;
|
|
2221
|
+
// Reconsume
|
|
2222
|
+
}
|
|
2223
|
+
break;
|
|
2224
|
+
|
|
2225
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
|
|
2226
|
+
case STATE_SCRIPT_DATA:
|
|
2227
|
+
// Consume the next input character:
|
|
2228
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2229
|
+
// Switch to the script data less-than sign state.
|
|
2230
|
+
if (cc === CC_LESS_THAN) {
|
|
2231
|
+
tagStart = pos;
|
|
2232
|
+
state = STATE_SCRIPT_DATA_LESS_THAN_SIGN;
|
|
2233
|
+
pos++;
|
|
2234
|
+
} else {
|
|
2235
|
+
pos++;
|
|
2236
|
+
}
|
|
2237
|
+
break;
|
|
2238
|
+
|
|
2239
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
|
|
2240
|
+
case STATE_SCRIPT_DATA_LESS_THAN_SIGN:
|
|
2241
|
+
// Consume the next input character:
|
|
2242
|
+
// U+002F SOLIDUS (/)
|
|
2243
|
+
// Switch to the script data end tag open state. (Spec sets a
|
|
2244
|
+
// temporary buffer here; we track via offset ranges instead.)
|
|
2245
|
+
if (cc === CC_SOLIDUS) {
|
|
2246
|
+
state = STATE_SCRIPT_DATA_END_TAG_OPEN;
|
|
2247
|
+
pos++;
|
|
2248
|
+
} else if (cc === CC_EXCLAMATION_MARK) {
|
|
2249
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
2250
|
+
// Switch to the script data escape start state. Emit a U+003C LESS-THAN
|
|
2251
|
+
// SIGN character token and a U+0021 EXCLAMATION MARK character token.
|
|
2252
|
+
state = STATE_SCRIPT_DATA_ESCAPE_START;
|
|
2253
|
+
pos++;
|
|
2254
|
+
} else {
|
|
2255
|
+
// Anything else
|
|
2256
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
|
|
2257
|
+
// data state.
|
|
2258
|
+
state = STATE_SCRIPT_DATA;
|
|
2259
|
+
// Reconsume
|
|
2260
|
+
}
|
|
2261
|
+
break;
|
|
2262
|
+
|
|
2263
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
|
|
2264
|
+
case STATE_SCRIPT_DATA_END_TAG_OPEN:
|
|
2265
|
+
// Consume the next input character:
|
|
2266
|
+
// ASCII alpha
|
|
2267
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
2268
|
+
// Reconsume in the script data end tag name state.
|
|
2269
|
+
if (isAsciiAlpha(cc)) {
|
|
2270
|
+
tagNameStart = pos;
|
|
2271
|
+
state = STATE_SCRIPT_DATA_END_TAG_NAME;
|
|
2272
|
+
// Reconsume
|
|
2273
|
+
} else {
|
|
2274
|
+
// Anything else
|
|
2275
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
2276
|
+
// character token. Reconsume in the script data state.
|
|
2277
|
+
state = STATE_SCRIPT_DATA;
|
|
2278
|
+
// Reconsume
|
|
2279
|
+
}
|
|
2280
|
+
break;
|
|
2281
|
+
|
|
2282
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
|
|
2283
|
+
case STATE_SCRIPT_DATA_END_TAG_NAME:
|
|
2284
|
+
// Consume the next input character:
|
|
2285
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2286
|
+
// U+000A LINE FEED (LF)
|
|
2287
|
+
// U+000C FORM FEED (FF)
|
|
2288
|
+
// U+0020 SPACE
|
|
2289
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2290
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
2291
|
+
// "anything else" entry below.
|
|
2292
|
+
if (isSpace(cc)) {
|
|
2293
|
+
tagNameEnd = pos;
|
|
2294
|
+
if (
|
|
2295
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2296
|
+
lastOpenTagName
|
|
2297
|
+
) {
|
|
2298
|
+
flushText(tagStart);
|
|
2299
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
2300
|
+
pos++;
|
|
2301
|
+
} else {
|
|
2302
|
+
state = STATE_SCRIPT_DATA;
|
|
2303
|
+
}
|
|
2304
|
+
} else if (cc === CC_SOLIDUS) {
|
|
2305
|
+
// U+002F SOLIDUS (/)
|
|
2306
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2307
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
2308
|
+
// "anything else" entry below.
|
|
2309
|
+
tagNameEnd = pos;
|
|
2310
|
+
if (
|
|
2311
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2312
|
+
lastOpenTagName
|
|
2313
|
+
) {
|
|
2314
|
+
flushText(tagStart);
|
|
2315
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
2316
|
+
pos++;
|
|
2317
|
+
} else {
|
|
2318
|
+
state = STATE_SCRIPT_DATA;
|
|
2319
|
+
}
|
|
2320
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2321
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2322
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2323
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
2324
|
+
// per the "anything else" entry below.
|
|
2325
|
+
tagNameEnd = pos;
|
|
2326
|
+
if (
|
|
2327
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2328
|
+
lastOpenTagName
|
|
2329
|
+
) {
|
|
2330
|
+
flushText(tagStart);
|
|
2331
|
+
state = STATE_DATA;
|
|
2332
|
+
pos = emitCloseTag(pos + 1);
|
|
2333
|
+
} else {
|
|
2334
|
+
state = STATE_SCRIPT_DATA;
|
|
2335
|
+
}
|
|
2336
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2337
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
2338
|
+
pos++;
|
|
2339
|
+
} else {
|
|
2340
|
+
// Anything else
|
|
2341
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
2342
|
+
// token, and a character token for each of the characters in the temporary
|
|
2343
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
2344
|
+
// script data state.
|
|
2345
|
+
state = STATE_SCRIPT_DATA;
|
|
2346
|
+
// Reconsume
|
|
2347
|
+
}
|
|
2348
|
+
break;
|
|
2349
|
+
|
|
2350
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
|
|
2351
|
+
case STATE_SCRIPT_DATA_ESCAPE_START:
|
|
2352
|
+
// Consume the next input character:
|
|
2353
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2354
|
+
// Switch to the script data escape start dash state. Emit a U+002D
|
|
2355
|
+
// HYPHEN-MINUS character token.
|
|
2356
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2357
|
+
state = STATE_SCRIPT_DATA_ESCAPE_START_DASH;
|
|
2358
|
+
pos++;
|
|
2359
|
+
} else {
|
|
2360
|
+
// Anything else
|
|
2361
|
+
// Reconsume in the script data state.
|
|
2362
|
+
state = STATE_SCRIPT_DATA;
|
|
2363
|
+
// Reconsume
|
|
2364
|
+
}
|
|
2365
|
+
break;
|
|
2366
|
+
|
|
2367
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
|
|
2368
|
+
case STATE_SCRIPT_DATA_ESCAPE_START_DASH:
|
|
2369
|
+
// Consume the next input character:
|
|
2370
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2371
|
+
// Switch to the script data escaped dash dash state. Emit a U+002D
|
|
2372
|
+
// HYPHEN-MINUS character token.
|
|
2373
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2374
|
+
state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
|
|
2375
|
+
pos++;
|
|
2376
|
+
} else {
|
|
2377
|
+
// Anything else
|
|
2378
|
+
// Reconsume in the script data state.
|
|
2379
|
+
state = STATE_SCRIPT_DATA;
|
|
2380
|
+
// Reconsume
|
|
2381
|
+
}
|
|
2382
|
+
break;
|
|
2383
|
+
|
|
2384
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
|
|
2385
|
+
case STATE_SCRIPT_DATA_ESCAPED:
|
|
2386
|
+
// Consume the next input character:
|
|
2387
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2388
|
+
// Switch to the script data escaped dash state. Emit a U+002D HYPHEN-MINUS
|
|
2389
|
+
// character token.
|
|
2390
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2391
|
+
state = STATE_SCRIPT_DATA_ESCAPED_DASH;
|
|
2392
|
+
pos++;
|
|
2393
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2394
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2395
|
+
// Switch to the script data escaped less-than sign state.
|
|
2396
|
+
tagStart = pos;
|
|
2397
|
+
state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
2398
|
+
pos++;
|
|
2399
|
+
} else {
|
|
2400
|
+
// Anything else
|
|
2401
|
+
// Emit the current input character as a character token.
|
|
2402
|
+
pos++;
|
|
2403
|
+
}
|
|
2404
|
+
break;
|
|
2405
|
+
|
|
2406
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
|
|
2407
|
+
case STATE_SCRIPT_DATA_ESCAPED_DASH:
|
|
2408
|
+
// Consume the next input character:
|
|
2409
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2410
|
+
// Switch to the script data escaped dash dash state. Emit a U+002D
|
|
2411
|
+
// HYPHEN-MINUS character token.
|
|
2412
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2413
|
+
state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
|
|
2414
|
+
pos++;
|
|
2415
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2416
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2417
|
+
// Switch to the script data escaped less-than sign state.
|
|
2418
|
+
tagStart = pos;
|
|
2419
|
+
state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
2420
|
+
pos++;
|
|
2421
|
+
} else {
|
|
2422
|
+
// Anything else
|
|
2423
|
+
// Switch to the script data escaped state. Emit the current input character
|
|
2424
|
+
// as a character token.
|
|
2425
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2426
|
+
pos++;
|
|
2427
|
+
}
|
|
2428
|
+
break;
|
|
2429
|
+
|
|
2430
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
|
|
2431
|
+
case STATE_SCRIPT_DATA_ESCAPED_DASH_DASH:
|
|
2432
|
+
// Consume the next input character:
|
|
2433
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2434
|
+
// Emit a U+002D HYPHEN-MINUS character token.
|
|
2435
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2436
|
+
pos++;
|
|
2437
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2438
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2439
|
+
// Switch to the script data escaped less-than sign state.
|
|
2440
|
+
tagStart = pos;
|
|
2441
|
+
state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
2442
|
+
pos++;
|
|
2443
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2444
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2445
|
+
// Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
|
|
2446
|
+
// character token.
|
|
2447
|
+
state = STATE_SCRIPT_DATA;
|
|
2448
|
+
pos++;
|
|
2449
|
+
} else {
|
|
2450
|
+
// Anything else
|
|
2451
|
+
// Switch to the script data escaped state. Emit the current input character
|
|
2452
|
+
// as a character token.
|
|
2453
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2454
|
+
pos++;
|
|
2455
|
+
}
|
|
2456
|
+
break;
|
|
2457
|
+
|
|
2458
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
|
|
2459
|
+
case STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
|
|
2460
|
+
// Consume the next input character:
|
|
2461
|
+
// U+002F SOLIDUS (/)
|
|
2462
|
+
// Switch to the script data escaped end tag open state.
|
|
2463
|
+
// (Spec sets a temporary buffer; we track via offset ranges.)
|
|
2464
|
+
if (cc === CC_SOLIDUS) {
|
|
2465
|
+
state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
|
|
2466
|
+
pos++;
|
|
2467
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2468
|
+
// ASCII alpha
|
|
2469
|
+
// Set the temporary buffer to the empty string. Emit a U+003C LESS-THAN
|
|
2470
|
+
// SIGN character token. Reconsume in the script data double escape start
|
|
2471
|
+
// state.
|
|
2472
|
+
scriptMatch = 0;
|
|
2473
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START;
|
|
2474
|
+
// Reconsume
|
|
2475
|
+
} else {
|
|
2476
|
+
// Anything else
|
|
2477
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
|
|
2478
|
+
// data escaped state.
|
|
2479
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2480
|
+
// Reconsume
|
|
2481
|
+
}
|
|
2482
|
+
break;
|
|
2483
|
+
|
|
2484
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
|
|
2485
|
+
case STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN:
|
|
2486
|
+
// Consume the next input character:
|
|
2487
|
+
// ASCII alpha
|
|
2488
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
2489
|
+
// Reconsume in the script data escaped end tag name state.
|
|
2490
|
+
if (isAsciiAlpha(cc)) {
|
|
2491
|
+
tagNameStart = pos;
|
|
2492
|
+
state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME;
|
|
2493
|
+
// Reconsume
|
|
2494
|
+
} else {
|
|
2495
|
+
// Anything else
|
|
2496
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
2497
|
+
// character token. Reconsume in the script data escaped state.
|
|
2498
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2499
|
+
// Reconsume
|
|
2500
|
+
}
|
|
2501
|
+
break;
|
|
2502
|
+
|
|
2503
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
|
|
2504
|
+
case STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME:
|
|
2505
|
+
// Consume the next input character:
|
|
2506
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2507
|
+
// U+000A LINE FEED (LF)
|
|
2508
|
+
// U+000C FORM FEED (FF)
|
|
2509
|
+
// U+0020 SPACE
|
|
2510
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2511
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
2512
|
+
// "anything else" entry below.
|
|
2513
|
+
if (isSpace(cc)) {
|
|
2514
|
+
tagNameEnd = pos;
|
|
2515
|
+
if (
|
|
2516
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2517
|
+
lastOpenTagName
|
|
2518
|
+
) {
|
|
2519
|
+
flushText(tagStart);
|
|
2520
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
2521
|
+
pos++;
|
|
2522
|
+
} else {
|
|
2523
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2524
|
+
}
|
|
2525
|
+
} else if (cc === CC_SOLIDUS) {
|
|
2526
|
+
// U+002F SOLIDUS (/)
|
|
2527
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2528
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
2529
|
+
// "anything else" entry below.
|
|
2530
|
+
tagNameEnd = pos;
|
|
2531
|
+
if (
|
|
2532
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2533
|
+
lastOpenTagName
|
|
2534
|
+
) {
|
|
2535
|
+
flushText(tagStart);
|
|
2536
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
2537
|
+
pos++;
|
|
2538
|
+
} else {
|
|
2539
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2540
|
+
}
|
|
2541
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2542
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2543
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2544
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
2545
|
+
// per the "anything else" entry below.
|
|
2546
|
+
tagNameEnd = pos;
|
|
2547
|
+
if (
|
|
2548
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2549
|
+
lastOpenTagName
|
|
2550
|
+
) {
|
|
2551
|
+
flushText(tagStart);
|
|
2552
|
+
state = STATE_DATA;
|
|
2553
|
+
pos = emitCloseTag(pos + 1);
|
|
2554
|
+
} else {
|
|
2555
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2556
|
+
}
|
|
2557
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2558
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
2559
|
+
pos++;
|
|
2560
|
+
} else {
|
|
2561
|
+
// Anything else
|
|
2562
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
2563
|
+
// token, and a character token for each of the characters in the temporary
|
|
2564
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
2565
|
+
// script data escaped state.
|
|
2566
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2567
|
+
// Reconsume
|
|
2568
|
+
}
|
|
2569
|
+
break;
|
|
2570
|
+
|
|
2571
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
|
|
2572
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START:
|
|
2573
|
+
// Consume the next input character:
|
|
2574
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2575
|
+
// U+000A LINE FEED (LF)
|
|
2576
|
+
// U+000C FORM FEED (FF)
|
|
2577
|
+
// U+0020 SPACE
|
|
2578
|
+
// U+002F SOLIDUS (/)
|
|
2579
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2580
|
+
// If the temporary buffer is the string "script", then switch to the script
|
|
2581
|
+
// data double escaped state. Otherwise, switch to the script data escaped
|
|
2582
|
+
// state. Emit the current input character as a character token.
|
|
2583
|
+
if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
2584
|
+
state =
|
|
2585
|
+
scriptMatch === 6
|
|
2586
|
+
? STATE_SCRIPT_DATA_DOUBLE_ESCAPED
|
|
2587
|
+
: STATE_SCRIPT_DATA_ESCAPED;
|
|
2588
|
+
pos++;
|
|
2589
|
+
} else if (isAsciiUpperAlpha(cc) || isAsciiLowerAlpha(cc)) {
|
|
2590
|
+
// ASCII alpha — advance the `"script"` match counter if the
|
|
2591
|
+
// lowercase form matches the next expected char, otherwise
|
|
2592
|
+
// snap to the sentinel so further chars can't revive a
|
|
2593
|
+
// match. No buffer allocation.
|
|
2594
|
+
const lower = isAsciiUpperAlpha(cc) ? cc + 0x20 : cc;
|
|
2595
|
+
if (scriptMatch < 6 && lower === "script".charCodeAt(scriptMatch)) {
|
|
2596
|
+
scriptMatch++;
|
|
2597
|
+
} else {
|
|
2598
|
+
scriptMatch = 7;
|
|
2599
|
+
}
|
|
2600
|
+
pos++;
|
|
2601
|
+
} else {
|
|
2602
|
+
// Anything else
|
|
2603
|
+
// Reconsume in the script data escaped state.
|
|
2604
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2605
|
+
// Reconsume
|
|
2606
|
+
}
|
|
2607
|
+
break;
|
|
2608
|
+
|
|
2609
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
|
|
2610
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED:
|
|
2611
|
+
// Consume the next input character:
|
|
2612
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2613
|
+
// Switch to the script data double escaped dash state. Emit a U+002D
|
|
2614
|
+
// HYPHEN-MINUS character token.
|
|
2615
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2616
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
|
|
2617
|
+
pos++;
|
|
2618
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2619
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2620
|
+
// Switch to the script data double escaped less-than sign state. Emit a
|
|
2621
|
+
// U+003C LESS-THAN SIGN character token.
|
|
2622
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
2623
|
+
pos++;
|
|
2624
|
+
} else {
|
|
2625
|
+
// Anything else
|
|
2626
|
+
// Emit the current input character as a character token.
|
|
2627
|
+
pos++;
|
|
2628
|
+
}
|
|
2629
|
+
break;
|
|
2630
|
+
|
|
2631
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
|
|
2632
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
|
|
2633
|
+
// Consume the next input character:
|
|
2634
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2635
|
+
// Switch to the script data double escaped dash dash state. Emit a U+002D
|
|
2636
|
+
// HYPHEN-MINUS character token.
|
|
2637
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2638
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
|
|
2639
|
+
pos++;
|
|
2640
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2641
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2642
|
+
// Switch to the script data double escaped less-than sign state. Emit a
|
|
2643
|
+
// U+003C LESS-THAN SIGN character token.
|
|
2644
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
2645
|
+
pos++;
|
|
2646
|
+
} else {
|
|
2647
|
+
// Anything else
|
|
2648
|
+
// Switch to the script data double escaped state. Emit the current input
|
|
2649
|
+
// character as a character token.
|
|
2650
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2651
|
+
pos++;
|
|
2652
|
+
}
|
|
2653
|
+
break;
|
|
2654
|
+
|
|
2655
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
|
|
2656
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
|
|
2657
|
+
// Consume the next input character:
|
|
2658
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2659
|
+
// Emit a U+002D HYPHEN-MINUS character token.
|
|
2660
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2661
|
+
pos++;
|
|
2662
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2663
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2664
|
+
// Switch to the script data double escaped less-than sign state. Emit a
|
|
2665
|
+
// U+003C LESS-THAN SIGN character token.
|
|
2666
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
2667
|
+
pos++;
|
|
2668
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2669
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2670
|
+
// Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
|
|
2671
|
+
// character token.
|
|
2672
|
+
state = STATE_SCRIPT_DATA;
|
|
2673
|
+
pos++;
|
|
2674
|
+
} else {
|
|
2675
|
+
// Anything else
|
|
2676
|
+
// Switch to the script data double escaped state. Emit the current input
|
|
2677
|
+
// character as a character token.
|
|
2678
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2679
|
+
pos++;
|
|
2680
|
+
}
|
|
2681
|
+
break;
|
|
2682
|
+
|
|
2683
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
|
|
2684
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
|
|
2685
|
+
// Consume the next input character:
|
|
2686
|
+
// U+002F SOLIDUS (/)
|
|
2687
|
+
// Set the temporary buffer to the empty string. Switch to the script data
|
|
2688
|
+
// double escape end state. Emit a U+002F SOLIDUS character token.
|
|
2689
|
+
if (cc === CC_SOLIDUS) {
|
|
2690
|
+
scriptMatch = 0;
|
|
2691
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END;
|
|
2692
|
+
pos++;
|
|
2693
|
+
} else {
|
|
2694
|
+
// Anything else
|
|
2695
|
+
// Reconsume in the script data double escaped state.
|
|
2696
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2697
|
+
// Reconsume
|
|
2698
|
+
}
|
|
2699
|
+
break;
|
|
2700
|
+
|
|
2701
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
|
|
2702
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END:
|
|
2703
|
+
// Consume the next input character:
|
|
2704
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2705
|
+
// U+000A LINE FEED (LF)
|
|
2706
|
+
// U+000C FORM FEED (FF)
|
|
2707
|
+
// U+0020 SPACE
|
|
2708
|
+
// U+002F SOLIDUS (/)
|
|
2709
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2710
|
+
// If the temporary buffer is the string "script", then switch to the script
|
|
2711
|
+
// data escaped state. Otherwise, switch to the script data double escaped
|
|
2712
|
+
// state. Emit the current input character as a character token.
|
|
2713
|
+
if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
2714
|
+
state =
|
|
2715
|
+
scriptMatch === 6
|
|
2716
|
+
? STATE_SCRIPT_DATA_ESCAPED
|
|
2717
|
+
: STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2718
|
+
pos++;
|
|
2719
|
+
} else if (isAsciiUpperAlpha(cc) || isAsciiLowerAlpha(cc)) {
|
|
2720
|
+
// ASCII alpha — advance the `"script"` match counter if the
|
|
2721
|
+
// lowercase form matches the next expected char, otherwise
|
|
2722
|
+
// snap to the sentinel so further chars can't revive a
|
|
2723
|
+
// match. No buffer allocation.
|
|
2724
|
+
const lower = isAsciiUpperAlpha(cc) ? cc + 0x20 : cc;
|
|
2725
|
+
if (scriptMatch < 6 && lower === "script".charCodeAt(scriptMatch)) {
|
|
2726
|
+
scriptMatch++;
|
|
2727
|
+
} else {
|
|
2728
|
+
scriptMatch = 7;
|
|
2729
|
+
}
|
|
2730
|
+
pos++;
|
|
2731
|
+
} else {
|
|
2732
|
+
// Anything else
|
|
2733
|
+
// Reconsume in the script data double escaped state.
|
|
2734
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2735
|
+
// Reconsume
|
|
2736
|
+
}
|
|
2737
|
+
break;
|
|
2738
|
+
|
|
2739
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
|
|
2740
|
+
case STATE_PLAINTEXT:
|
|
2741
|
+
// Consume the next input character:
|
|
2742
|
+
// Anything else
|
|
2743
|
+
// Emit the current input character as a character token.
|
|
2744
|
+
pos++;
|
|
2745
|
+
break;
|
|
2746
|
+
|
|
2747
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
|
|
2748
|
+
case STATE_CHARACTER_REFERENCE:
|
|
2749
|
+
// Set the temporary buffer to the empty string. Append a U+0026
|
|
2750
|
+
// AMPERSAND (&) character to the temporary buffer.
|
|
2751
|
+
// Consume the next input character:
|
|
2752
|
+
if (isAsciiAlphanumeric(cc)) {
|
|
2753
|
+
// ASCII alphanumeric
|
|
2754
|
+
// Reconsume in the named character reference state.
|
|
2755
|
+
state = STATE_NAMED_CHARACTER_REFERENCE;
|
|
2756
|
+
// Reconsume
|
|
2757
|
+
} else if (cc === CC_NUMBER_SIGN) {
|
|
2758
|
+
// U+0023 NUMBER SIGN (#)
|
|
2759
|
+
// Append the current input character to the temporary buffer.
|
|
2760
|
+
// Switch to the numeric character reference state.
|
|
2761
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE;
|
|
2762
|
+
pos++;
|
|
2763
|
+
} else {
|
|
2764
|
+
// Anything else
|
|
2765
|
+
// Flush code points consumed as a character reference.
|
|
2766
|
+
// Reconsume in the return state.
|
|
2767
|
+
state = returnState;
|
|
2768
|
+
// Reconsume
|
|
2769
|
+
}
|
|
2770
|
+
break;
|
|
2771
|
+
|
|
2772
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
|
2773
|
+
case STATE_NAMED_CHARACTER_REFERENCE: {
|
|
2774
|
+
// Consume the maximum number of characters possible where the
|
|
2775
|
+
// consumed characters are one of the identifiers in the first
|
|
2776
|
+
// column of the named character references table.
|
|
2777
|
+
//
|
|
2778
|
+
// We measure the longest run of ASCII alphanumeric characters
|
|
2779
|
+
// (capped at MAX_ENTITY_NAME_LEN - 1 since the optional `;` is
|
|
2780
|
+
// handled separately), then walk that run from longest to
|
|
2781
|
+
// shortest looking for the first prefix that exists in the
|
|
2782
|
+
// entity table (with a trailing `;` if present, otherwise the
|
|
2783
|
+
// legacy bare form).
|
|
2784
|
+
let runLen = 0;
|
|
2785
|
+
while (
|
|
2786
|
+
pos + runLen < len &&
|
|
2787
|
+
isAsciiAlphanumeric(input.charCodeAt(pos + runLen)) &&
|
|
2788
|
+
runLen < MAX_ENTITY_NAME_LEN - 1
|
|
2789
|
+
) {
|
|
2790
|
+
runLen++;
|
|
2791
|
+
}
|
|
2792
|
+
const hasSemicolon =
|
|
2793
|
+
pos + runLen < len && input.charCodeAt(pos + runLen) === CC_SEMICOLON;
|
|
2794
|
+
namedEntityConsumed = 0;
|
|
2795
|
+
for (let n = runLen; n > 0; n--) {
|
|
2796
|
+
// Try with trailing `;` first if one is present after the run.
|
|
2797
|
+
if (n === runLen && hasSemicolon) {
|
|
2798
|
+
const withSemi = `${input.slice(pos, pos + n)};`;
|
|
2799
|
+
if (HTML_ENTITIES[withSemi] !== undefined) {
|
|
2800
|
+
namedEntityConsumed = n + 1;
|
|
2801
|
+
break;
|
|
2802
|
+
}
|
|
2803
|
+
}
|
|
2804
|
+
const bare = input.slice(pos, pos + n);
|
|
2805
|
+
if (HTML_ENTITIES[bare] !== undefined) {
|
|
2806
|
+
namedEntityConsumed = n;
|
|
2807
|
+
break;
|
|
2808
|
+
}
|
|
2809
|
+
}
|
|
2810
|
+
if (namedEntityConsumed > 0) {
|
|
2811
|
+
pos += namedEntityConsumed;
|
|
2812
|
+
state = returnState;
|
|
2813
|
+
} else {
|
|
2814
|
+
// No match — flush code points consumed as a character
|
|
2815
|
+
// reference. Switch to the ambiguous ampersand state.
|
|
2816
|
+
state = STATE_AMBIGUOUS_AMPERSAND;
|
|
2817
|
+
}
|
|
2818
|
+
break;
|
|
2819
|
+
}
|
|
2820
|
+
|
|
2821
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
|
|
2822
|
+
case STATE_AMBIGUOUS_AMPERSAND:
|
|
2823
|
+
// Consume the next input character:
|
|
2824
|
+
if (isAsciiAlphanumeric(cc)) {
|
|
2825
|
+
// ASCII alphanumeric
|
|
2826
|
+
// If the character reference was consumed as part of an
|
|
2827
|
+
// attribute, then append the current input character to the
|
|
2828
|
+
// current attribute's value. Otherwise, emit the current
|
|
2829
|
+
// input character as a character token.
|
|
2830
|
+
pos++;
|
|
2831
|
+
} else if (cc === CC_SEMICOLON) {
|
|
2832
|
+
// U+003B SEMICOLON (;)
|
|
2833
|
+
// This is an unknown-named-character-reference parse error.
|
|
2834
|
+
// Reconsume in the return state.
|
|
2835
|
+
reportError(
|
|
2836
|
+
"unknown-named-character-reference",
|
|
2837
|
+
pos,
|
|
2838
|
+
pos + 1,
|
|
2839
|
+
"warning"
|
|
2840
|
+
);
|
|
2841
|
+
state = returnState;
|
|
2842
|
+
// Reconsume
|
|
2843
|
+
} else {
|
|
2844
|
+
// Anything else
|
|
2845
|
+
// Reconsume in the return state.
|
|
2846
|
+
state = returnState;
|
|
2847
|
+
// Reconsume
|
|
2848
|
+
}
|
|
2849
|
+
break;
|
|
2850
|
+
|
|
2851
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
|
|
2852
|
+
case STATE_NUMERIC_CHARACTER_REFERENCE:
|
|
2853
|
+
// Set the character reference code to zero (0).
|
|
2854
|
+
// Consume the next input character:
|
|
2855
|
+
if (cc === 0x78 || cc === 0x58) {
|
|
2856
|
+
// U+0078 LATIN SMALL LETTER X
|
|
2857
|
+
// U+0058 LATIN CAPITAL LETTER X
|
|
2858
|
+
// Append the current input character to the temporary
|
|
2859
|
+
// buffer. Switch to the hexadecimal character reference
|
|
2860
|
+
// start state.
|
|
2861
|
+
state = STATE_HEXADECIMAL_CHARACTER_REFERENCE_START;
|
|
2862
|
+
pos++;
|
|
2863
|
+
} else {
|
|
2864
|
+
// Anything else
|
|
2865
|
+
// Reconsume in the decimal character reference start state.
|
|
2866
|
+
state = STATE_DECIMAL_CHARACTER_REFERENCE_START;
|
|
2867
|
+
// Reconsume
|
|
2868
|
+
}
|
|
2869
|
+
break;
|
|
2870
|
+
|
|
2871
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
|
|
2872
|
+
case STATE_HEXADECIMAL_CHARACTER_REFERENCE_START:
|
|
2873
|
+
// Consume the next input character:
|
|
2874
|
+
// ASCII hex digit: reconsume in the hexadecimal character reference state.
|
|
2875
|
+
// Anything else: absence-of-digits-in-numeric-character-reference parse
|
|
2876
|
+
// error. Flush code points consumed as a character reference. Reconsume
|
|
2877
|
+
// in the return state.
|
|
2878
|
+
if (isAsciiHexDigit(cc)) {
|
|
2879
|
+
state = STATE_HEXADECIMAL_CHARACTER_REFERENCE;
|
|
2880
|
+
} else {
|
|
2881
|
+
reportError(
|
|
2882
|
+
"absence-of-digits-in-numeric-character-reference",
|
|
2883
|
+
pos,
|
|
2884
|
+
pos + 1,
|
|
2885
|
+
"warning"
|
|
2886
|
+
);
|
|
2887
|
+
state = returnState;
|
|
2888
|
+
}
|
|
2889
|
+
// Reconsume
|
|
2890
|
+
break;
|
|
2891
|
+
|
|
2892
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
|
|
2893
|
+
case STATE_DECIMAL_CHARACTER_REFERENCE_START:
|
|
2894
|
+
// Consume the next input character:
|
|
2895
|
+
// ASCII digit: reconsume in the decimal character reference state.
|
|
2896
|
+
// Anything else: absence-of-digits-in-numeric-character-reference parse
|
|
2897
|
+
// error. Flush code points consumed as a character reference. Reconsume
|
|
2898
|
+
// in the return state.
|
|
2899
|
+
if (isAsciiDigit(cc)) {
|
|
2900
|
+
state = STATE_DECIMAL_CHARACTER_REFERENCE;
|
|
2901
|
+
} else {
|
|
2902
|
+
reportError(
|
|
2903
|
+
"absence-of-digits-in-numeric-character-reference",
|
|
2904
|
+
pos,
|
|
2905
|
+
pos + 1,
|
|
2906
|
+
"warning"
|
|
2907
|
+
);
|
|
2908
|
+
state = returnState;
|
|
2909
|
+
}
|
|
2910
|
+
// Reconsume
|
|
2911
|
+
break;
|
|
2912
|
+
|
|
2913
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
|
|
2914
|
+
case STATE_HEXADECIMAL_CHARACTER_REFERENCE:
|
|
2915
|
+
// Consume the next input character:
|
|
2916
|
+
if (isAsciiHexDigit(cc)) {
|
|
2917
|
+
// ASCII digit / upper hex / lower hex
|
|
2918
|
+
// Multiply the character reference code by 16. Add a numeric
|
|
2919
|
+
// version of the current input character to the character
|
|
2920
|
+
// reference code.
|
|
2921
|
+
pos++;
|
|
2922
|
+
} else if (cc === CC_SEMICOLON) {
|
|
2923
|
+
// U+003B SEMICOLON
|
|
2924
|
+
// Switch to the numeric character reference end state.
|
|
2925
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2926
|
+
pos++;
|
|
2927
|
+
} else {
|
|
2928
|
+
// Anything else
|
|
2929
|
+
// This is a missing-semicolon-after-character-reference
|
|
2930
|
+
// parse error. Reconsume in the numeric character reference
|
|
2931
|
+
// end state.
|
|
2932
|
+
reportError(
|
|
2933
|
+
"missing-semicolon-after-character-reference",
|
|
2934
|
+
pos,
|
|
2935
|
+
pos + 1,
|
|
2936
|
+
"warning"
|
|
2937
|
+
);
|
|
2938
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2939
|
+
// Reconsume
|
|
2940
|
+
}
|
|
2941
|
+
break;
|
|
2942
|
+
|
|
2943
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
|
|
2944
|
+
case STATE_DECIMAL_CHARACTER_REFERENCE:
|
|
2945
|
+
// Consume the next input character:
|
|
2946
|
+
if (isAsciiDigit(cc)) {
|
|
2947
|
+
// ASCII digit
|
|
2948
|
+
// Multiply the character reference code by 10. Add a numeric
|
|
2949
|
+
// version of the current input character (subtract 0x0030
|
|
2950
|
+
// from the character's code point) to the character reference
|
|
2951
|
+
// code.
|
|
2952
|
+
pos++;
|
|
2953
|
+
} else if (cc === CC_SEMICOLON) {
|
|
2954
|
+
// U+003B SEMICOLON
|
|
2955
|
+
// Switch to the numeric character reference end state.
|
|
2956
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2957
|
+
pos++;
|
|
2958
|
+
} else {
|
|
2959
|
+
// Anything else
|
|
2960
|
+
// This is a missing-semicolon-after-character-reference
|
|
2961
|
+
// parse error. Reconsume in the numeric character reference
|
|
2962
|
+
// end state.
|
|
2963
|
+
reportError(
|
|
2964
|
+
"missing-semicolon-after-character-reference",
|
|
2965
|
+
pos,
|
|
2966
|
+
pos + 1,
|
|
2967
|
+
"warning"
|
|
2968
|
+
);
|
|
2969
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2970
|
+
// Reconsume
|
|
2971
|
+
}
|
|
2972
|
+
break;
|
|
2973
|
+
|
|
2974
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
|
2975
|
+
case STATE_NUMERIC_CHARACTER_REFERENCE_END:
|
|
2976
|
+
// Check the character reference code (validation omitted for
|
|
2977
|
+
// the scanner — we don't decode, just skip past the entity).
|
|
2978
|
+
// Flush code points consumed as a character reference.
|
|
2979
|
+
// Switch to the return state.
|
|
2980
|
+
state = returnState;
|
|
2981
|
+
// Reconsume
|
|
2982
|
+
break;
|
|
2983
|
+
|
|
2984
|
+
/* istanbul ignore next -- @preserve: defensive fallback, all states are explicit above */
|
|
2985
|
+
default:
|
|
2986
|
+
pos++;
|
|
2987
|
+
}
|
|
2988
|
+
}
|
|
2989
|
+
|
|
2990
|
+
// Handle EOF in non-data states per the WHATWG spec.
|
|
2991
|
+
//
|
|
2992
|
+
// Each in-progress comment / doctype / cdata / tag emits its partial
|
|
2993
|
+
// token range plus a corresponding `eof-in-X` parse error. Severity is
|
|
2994
|
+
// `"error"` because the emitted token offset range is incomplete (missing
|
|
2995
|
+
// trailing `-->`, `>`, `]]>`, etc.). For data / `<` / `</` / `<!`-only
|
|
2996
|
+
// inputs we emit `eof-before-tag-name` and fall through to flush the
|
|
2997
|
+
// pending text span (which still contains the lone `<`).
|
|
2998
|
+
// If EOF caught us inside a character-reference state, flush whatever the
|
|
2999
|
+
// scanner had consumed and resume in the return state so any in-progress
|
|
3000
|
+
// tag/comment is handled correctly by the branches below.
|
|
3001
|
+
if (
|
|
3002
|
+
state >= STATE_CHARACTER_REFERENCE &&
|
|
3003
|
+
state <= STATE_NUMERIC_CHARACTER_REFERENCE_END
|
|
3004
|
+
) {
|
|
3005
|
+
state = returnState;
|
|
3006
|
+
}
|
|
3007
|
+
|
|
3008
|
+
if (
|
|
3009
|
+
(state >= STATE_TAG_NAME && state <= STATE_SELF_CLOSING_START_TAG) ||
|
|
3010
|
+
state === STATE_RCDATA_END_TAG_NAME ||
|
|
3011
|
+
state === STATE_RAWTEXT_END_TAG_NAME ||
|
|
3012
|
+
state === STATE_SCRIPT_DATA_END_TAG_NAME ||
|
|
3013
|
+
state === STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME
|
|
3014
|
+
) {
|
|
3015
|
+
// EOF mid-tag — emit the partial open/close tag at EOF so the
|
|
3016
|
+
// consumer still sees the tag. This is a deliberate deviation
|
|
3017
|
+
// from the spec's per-character emission model: rather than
|
|
3018
|
+
// dropping the in-progress tag, we emit its offset range up to EOF.
|
|
3019
|
+
reportError("eof-in-tag", len, len, "error");
|
|
3020
|
+
// If we hit EOF mid-attribute-name, the name runs to EOF. Set
|
|
3021
|
+
// attrNameEnd here so the emitted attribute range is valid.
|
|
3022
|
+
if (state === STATE_ATTRIBUTE_NAME && attrNameStart !== -1) {
|
|
3023
|
+
attrNameEnd = len;
|
|
3024
|
+
}
|
|
3025
|
+
if (attrNameStart !== -1) emitAttribute(len);
|
|
3026
|
+
// If we hit EOF before the tag-name end was recorded, the name runs
|
|
3027
|
+
// to EOF. `tagNameEnd` may carry over from a previously emitted tag,
|
|
3028
|
+
// so reset it whenever it's missing or stale (less than `tagNameStart`)
|
|
3029
|
+
// — covers `<div` open-tag EOFs as well as `<title>x</tit` and other
|
|
3030
|
+
// content-mode end-tag-name EOFs.
|
|
3031
|
+
if (tagNameStart !== -1 && tagNameEnd < tagNameStart) {
|
|
3032
|
+
tagNameEnd = len;
|
|
3033
|
+
}
|
|
3034
|
+
flushText(tagStart);
|
|
3035
|
+
pos =
|
|
3036
|
+
input.charCodeAt(tagStart + 1) === CC_SOLIDUS
|
|
3037
|
+
? emitCloseTag(len)
|
|
3038
|
+
: emitOpenTag(len, false);
|
|
3039
|
+
} else if (
|
|
3040
|
+
(state >= STATE_COMMENT_START && state <= STATE_BOGUS_COMMENT) ||
|
|
3041
|
+
(state >= STATE_COMMENT_LESS_THAN_SIGN &&
|
|
3042
|
+
state <= STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH) ||
|
|
3043
|
+
state === STATE_MARKUP_DECLARATION_OPEN
|
|
3044
|
+
) {
|
|
3045
|
+
// Bogus comments at EOF are normal per spec (no parse error).
|
|
3046
|
+
if (state !== STATE_BOGUS_COMMENT) {
|
|
3047
|
+
reportError("eof-in-comment", len, len, "error");
|
|
3048
|
+
}
|
|
3049
|
+
if (callbacks.comment !== undefined) {
|
|
3050
|
+
pos = callbacks.comment(input, commentStart, len);
|
|
3051
|
+
}
|
|
3052
|
+
} else if (state >= STATE_CDATA_SECTION && state <= STATE_CDATA_SECTION_END) {
|
|
3053
|
+
reportError("eof-in-cdata", len, len, "error");
|
|
3054
|
+
if (callbacks.comment !== undefined) {
|
|
3055
|
+
pos = callbacks.comment(input, commentStart, len);
|
|
3056
|
+
}
|
|
3057
|
+
} else if (state >= STATE_DOCTYPE && state <= STATE_BOGUS_DOCTYPE) {
|
|
3058
|
+
reportError("eof-in-doctype", len, len, "error");
|
|
3059
|
+
if (callbacks.doctype !== undefined) {
|
|
3060
|
+
pos = callbacks.doctype(input, commentStart, len);
|
|
3061
|
+
}
|
|
3062
|
+
} else {
|
|
3063
|
+
if (
|
|
3064
|
+
state === STATE_SCRIPT_DATA_ESCAPED ||
|
|
3065
|
+
state === STATE_SCRIPT_DATA_ESCAPED_DASH ||
|
|
3066
|
+
state === STATE_SCRIPT_DATA_ESCAPED_DASH_DASH ||
|
|
3067
|
+
state === STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN ||
|
|
3068
|
+
state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED ||
|
|
3069
|
+
state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH ||
|
|
3070
|
+
state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH ||
|
|
3071
|
+
state === STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN ||
|
|
3072
|
+
state === STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END
|
|
3073
|
+
) {
|
|
3074
|
+
// Inside `<script><!-- … ` at EOF — spec calls this an
|
|
3075
|
+
// eof-in-script-html-comment-like-text parse error. The
|
|
3076
|
+
// less-than-sign and double-escape-end states reconsume back
|
|
3077
|
+
// into the (double-)escaped state on EOF per spec, which then
|
|
3078
|
+
// hits this same error.
|
|
3079
|
+
reportError("eof-in-script-html-comment-like-text", len, len, "error");
|
|
3080
|
+
} else if (state === STATE_TAG_OPEN || state === STATE_END_TAG_OPEN) {
|
|
3081
|
+
// `<` or `</` with nothing after; spec calls this
|
|
3082
|
+
// eof-before-tag-name. The lone `<` / `</` is preserved in the
|
|
3083
|
+
// pending text span which is flushed below.
|
|
3084
|
+
reportError("eof-before-tag-name", len, len, "warning");
|
|
3085
|
+
}
|
|
3086
|
+
if (textStart < len && callbacks.text !== undefined) {
|
|
3087
|
+
callbacks.text(input, textStart, len);
|
|
3088
|
+
}
|
|
3089
|
+
}
|
|
3090
|
+
|
|
3091
|
+
return pos;
|
|
3092
|
+
};
|
|
3093
|
+
|
|
3094
|
+
walkHtmlTokens.QUOTE_NONE = QUOTE_NONE;
|
|
3095
|
+
walkHtmlTokens.QUOTE_SINGLE = QUOTE_SINGLE;
|
|
3096
|
+
walkHtmlTokens.QUOTE_DOUBLE = QUOTE_DOUBLE;
|
|
3097
|
+
|
|
3098
|
+
// WHATWG numeric-character-reference-end Windows-1252 remap table for the
|
|
3099
|
+
// 0x80-0x9F range. Per spec these C1 control code points decode to the
|
|
3100
|
+
// corresponding Windows-1252 glyph (with a parse error) rather than to the
|
|
3101
|
+
// raw C1 control character.
|
|
3102
|
+
const NUMERIC_C1_REMAP = {
|
|
3103
|
+
0x80: "€",
|
|
3104
|
+
0x82: "‚",
|
|
3105
|
+
0x83: "ƒ",
|
|
3106
|
+
0x84: "„",
|
|
3107
|
+
0x85: "…",
|
|
3108
|
+
0x86: "†",
|
|
3109
|
+
0x87: "‡",
|
|
3110
|
+
0x88: "ˆ",
|
|
3111
|
+
0x89: "‰",
|
|
3112
|
+
0x8a: "Š",
|
|
3113
|
+
0x8b: "‹",
|
|
3114
|
+
0x8c: "Œ",
|
|
3115
|
+
0x8e: "Ž",
|
|
3116
|
+
0x91: "‘",
|
|
3117
|
+
0x92: "’",
|
|
3118
|
+
0x93: "“",
|
|
3119
|
+
0x94: "”",
|
|
3120
|
+
0x95: "•",
|
|
3121
|
+
0x96: "–",
|
|
3122
|
+
0x97: "—",
|
|
3123
|
+
0x98: "˜",
|
|
3124
|
+
0x99: "™",
|
|
3125
|
+
0x9a: "š",
|
|
3126
|
+
0x9b: "›",
|
|
3127
|
+
0x9c: "œ",
|
|
3128
|
+
0x9e: "ž",
|
|
3129
|
+
0x9f: "Ÿ"
|
|
3130
|
+
};
|
|
3131
|
+
|
|
3132
|
+
/**
|
|
3133
|
+
* @param {number} code numeric character reference code point
|
|
3134
|
+
* @returns {string} decoded character per WHATWG remap rules
|
|
3135
|
+
*/
|
|
3136
|
+
const decodeNumericReference = (code) => {
|
|
3137
|
+
// Per WHATWG numeric-character-reference-end-state:
|
|
3138
|
+
// - 0x00, > 0x10FFFF, or surrogate (0xD800-0xDFFF) -> U+FFFD.
|
|
3139
|
+
// - 0x80-0x9F -> Windows-1252 remap (above).
|
|
3140
|
+
// - Anything else (including noncharacters and C0 controls) -> the
|
|
3141
|
+
// code point itself; we don't surface the spec's parse-error
|
|
3142
|
+
// classes here since decoding is happening after the scanner ran.
|
|
3143
|
+
if (code === 0 || code > 0x10ffff || (code >= 0xd800 && code <= 0xdfff)) {
|
|
3144
|
+
return "�";
|
|
3145
|
+
}
|
|
3146
|
+
if (code >= 0x80 && code <= 0x9f) {
|
|
3147
|
+
const remapped = /** @type {Record<number, string>} */ (NUMERIC_C1_REMAP)[
|
|
3148
|
+
code
|
|
3149
|
+
];
|
|
3150
|
+
if (remapped !== undefined) return remapped;
|
|
3151
|
+
}
|
|
3152
|
+
return String.fromCodePoint(code);
|
|
3153
|
+
};
|
|
3154
|
+
|
|
3155
|
+
/**
|
|
3156
|
+
* Decode HTML character references in a string. Handles all numeric
|
|
3157
|
+
* references (with WHATWG remap of 0x00, surrogates, out-of-range, and the
|
|
3158
|
+
* C1 Windows-1252 table) and the full WHATWG named character references
|
|
3159
|
+
* table. Unknown or malformed references are left as literal text.
|
|
3160
|
+
*
|
|
3161
|
+
* When `isAttribute` is `true`, applies the WHATWG
|
|
3162
|
+
* "consumed-as-part-of-an-attribute" rule: a named reference without a
|
|
3163
|
+
* trailing `;` whose next character is `=` or ASCII alphanumeric is left
|
|
3164
|
+
* undecoded, so e.g. `&=foo` stays literal in an attribute value but
|
|
3165
|
+
* decodes to `&=foo` in text.
|
|
3166
|
+
* @param {string} str the raw string from the token slice
|
|
3167
|
+
* @param {boolean=} isAttribute true if `str` came from an attribute value
|
|
3168
|
+
* @returns {string} decoded string
|
|
3169
|
+
*/
|
|
3170
|
+
walkHtmlTokens.decodeHtmlEntities = (str, isAttribute) => {
|
|
3171
|
+
if (!str.includes("&")) return str;
|
|
3172
|
+
|
|
3173
|
+
// Match one of three forms (each with an optional trailing `;`):
|
|
3174
|
+
// `&#x<hex>` - hex numeric reference (requires the `x`/`X`).
|
|
3175
|
+
// `&#<dec>` - decimal numeric reference (digits only).
|
|
3176
|
+
// `&<name>` - named reference (letter followed by alphanumerics).
|
|
3177
|
+
// The three alternatives are kept separate so a decimal reference like
|
|
3178
|
+
// `Ab` doesn't greedily eat the trailing `b` as if it were hex.
|
|
3179
|
+
return str.replace(
|
|
3180
|
+
/&(?:#[xX][0-9a-fA-F]+|#[0-9]+|[a-zA-Z][a-zA-Z0-9]*);?/g,
|
|
3181
|
+
(match, offset, source) => {
|
|
3182
|
+
// Numeric reference: A or A
|
|
3183
|
+
if (match.charCodeAt(1) === 0x23 /* # */) {
|
|
3184
|
+
const lastChar = match.charAt(match.length - 1);
|
|
3185
|
+
const isHex =
|
|
3186
|
+
match.charCodeAt(2) === 0x78 || match.charCodeAt(2) === 0x58;
|
|
3187
|
+
const body = isHex
|
|
3188
|
+
? lastChar === ";"
|
|
3189
|
+
? match.slice(3, -1)
|
|
3190
|
+
: match.slice(3)
|
|
3191
|
+
: lastChar === ";"
|
|
3192
|
+
? match.slice(2, -1)
|
|
3193
|
+
: match.slice(2);
|
|
3194
|
+
// The regex above guarantees at least one digit in `body`,
|
|
3195
|
+
// so `parseInt` always returns a finite number here.
|
|
3196
|
+
return decodeNumericReference(Number.parseInt(body, isHex ? 16 : 10));
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3199
|
+
// Named reference. Try the full captured name first, then
|
|
3200
|
+
// progressively shorter prefixes - this handles direct matches
|
|
3201
|
+
// like `&` as well as WHATWG longest-prefix semantics where
|
|
3202
|
+
// e.g. `¬pre;` decodes as `¬` (a legacy bare entity)
|
|
3203
|
+
// followed by `pre;` as literal text.
|
|
3204
|
+
const name = match.slice(1);
|
|
3205
|
+
const matchEndsWithSemi = name.charCodeAt(name.length - 1) === 0x3b;
|
|
3206
|
+
|
|
3207
|
+
// Attribute-context guard: if the entity match didn't end with `;`
|
|
3208
|
+
// and the next character in the source is `=` or ASCII
|
|
3209
|
+
// alphanumeric, the WHATWG spec says to flush the literal text
|
|
3210
|
+
// rather than decode. The greedy regex already absorbed any
|
|
3211
|
+
// trailing alphanumerics, so the only candidate "next char" here
|
|
3212
|
+
// is `=` (or any non-alphanumeric).
|
|
3213
|
+
if (isAttribute && !matchEndsWithSemi) {
|
|
3214
|
+
const after = source.charCodeAt(offset + match.length);
|
|
3215
|
+
if (after === 0x3d /* = */) return match;
|
|
3216
|
+
}
|
|
3217
|
+
|
|
3218
|
+
// Cap the longest-prefix search at MAX_ENTITY_NAME_LEN so pathological
|
|
3219
|
+
// inputs like `&` + thousands of alphanumerics stay linear-time.
|
|
3220
|
+
// Anything past that cap can't possibly match and is appended
|
|
3221
|
+
// verbatim as part of `name.slice(i)`.
|
|
3222
|
+
const searchLen =
|
|
3223
|
+
name.length > MAX_ENTITY_NAME_LEN ? MAX_ENTITY_NAME_LEN : name.length;
|
|
3224
|
+
for (let i = searchLen; i > 0; i--) {
|
|
3225
|
+
const prefix = name.slice(0, i);
|
|
3226
|
+
if (HTML_ENTITIES[prefix] !== undefined) {
|
|
3227
|
+
// Attribute-context longest-prefix guard: if the matched
|
|
3228
|
+
// prefix doesn't end with `;` and the leftover starts with
|
|
3229
|
+
// an alphanumeric character, leave literal per WHATWG.
|
|
3230
|
+
// (The regex greedy-consumes alphanumerics, so any leftover
|
|
3231
|
+
// within `name` is itself alphanumeric — we only need to
|
|
3232
|
+
// check non-empty leftover here; the `=` case is handled
|
|
3233
|
+
// above against the source character after the match.)
|
|
3234
|
+
if (
|
|
3235
|
+
isAttribute &&
|
|
3236
|
+
i < name.length &&
|
|
3237
|
+
prefix.charCodeAt(prefix.length - 1) !== 0x3b
|
|
3238
|
+
) {
|
|
3239
|
+
return match;
|
|
3240
|
+
}
|
|
3241
|
+
return HTML_ENTITIES[prefix] + name.slice(i);
|
|
3242
|
+
}
|
|
3243
|
+
}
|
|
3244
|
+
return match;
|
|
3245
|
+
}
|
|
3246
|
+
);
|
|
3247
|
+
};
|
|
3248
|
+
|
|
3249
|
+
module.exports = walkHtmlTokens;
|