webpack 5.106.1 → 5.107.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/lib/APIPlugin.js +4 -2
- package/lib/AsyncDependenciesBlock.js +3 -0
- package/lib/AutomaticPrefetchPlugin.js +6 -1
- package/lib/BannerPlugin.js +10 -1
- package/lib/Cache.js +27 -5
- package/lib/CacheFacade.js +26 -0
- package/lib/Chunk.js +46 -0
- package/lib/ChunkGraph.js +111 -0
- package/lib/ChunkGroup.js +57 -13
- package/lib/ChunkTemplate.js +9 -0
- package/lib/CleanPlugin.js +14 -1
- package/lib/CodeGenerationResults.js +19 -0
- package/lib/CompatibilityPlugin.js +23 -8
- package/lib/Compilation.js +161 -27
- package/lib/Compiler.js +41 -17
- package/lib/ConcatenationScope.js +24 -1
- package/lib/ConditionalInitFragment.js +6 -0
- package/lib/ConstPlugin.js +4 -1
- package/lib/ContextExclusionPlugin.js +2 -1
- package/lib/ContextModule.js +14 -2
- package/lib/ContextModuleFactory.js +10 -0
- package/lib/ContextReplacementPlugin.js +4 -1
- package/lib/DefinePlugin.js +22 -9
- package/lib/DependenciesBlock.js +6 -1
- package/lib/Dependency.js +48 -4
- package/lib/DependencyTemplate.js +7 -1
- package/lib/DependencyTemplates.js +4 -0
- package/lib/DotenvPlugin.js +3 -0
- package/lib/DynamicEntryPlugin.js +3 -1
- package/lib/EntryOptionPlugin.js +3 -0
- package/lib/EntryPlugin.js +2 -1
- package/lib/Entrypoint.js +4 -0
- package/lib/EnvironmentPlugin.js +3 -2
- package/lib/ErrorHelpers.js +7 -0
- package/lib/EvalDevToolModulePlugin.js +3 -1
- package/lib/EvalSourceMapDevToolPlugin.js +11 -10
- package/lib/ExportsInfo.js +56 -2
- package/lib/ExportsInfoApiPlugin.js +2 -1
- package/lib/ExternalModule.js +114 -15
- package/lib/ExternalModuleFactoryPlugin.js +15 -0
- package/lib/ExternalsPlugin.js +2 -1
- package/lib/FileSystemInfo.js +291 -71
- package/lib/FlagAllModulesAsUsedPlugin.js +2 -1
- package/lib/FlagDependencyExportsPlugin.js +4 -1
- package/lib/FlagDependencyUsagePlugin.js +5 -1
- package/lib/FlagEntryExportAsUsedPlugin.js +2 -1
- package/lib/Generator.js +16 -3
- package/lib/HotModuleReplacementPlugin.js +35 -9
- package/lib/IgnorePlugin.js +4 -2
- package/lib/IgnoreWarningsPlugin.js +2 -1
- package/lib/InitFragment.js +10 -0
- package/lib/JavascriptMetaInfoPlugin.js +2 -1
- package/lib/LibraryTemplatePlugin.js +2 -1
- package/lib/LoaderOptionsPlugin.js +3 -1
- package/lib/LoaderTargetPlugin.js +2 -1
- package/lib/MainTemplate.js +15 -0
- package/lib/ManifestPlugin.js +9 -2
- package/lib/Module.js +101 -19
- package/lib/ModuleFactory.js +6 -1
- package/lib/ModuleFilenameHelpers.js +3 -0
- package/lib/ModuleGraph.js +66 -0
- package/lib/ModuleGraphConnection.js +9 -0
- package/lib/ModuleInfoHeaderPlugin.js +5 -0
- package/lib/ModuleSourceTypeConstants.js +32 -1
- package/lib/ModuleTemplate.js +8 -0
- package/lib/ModuleTypeConstants.js +12 -3
- package/lib/MultiCompiler.js +30 -2
- package/lib/MultiStats.js +8 -0
- package/lib/MultiWatching.js +3 -0
- package/lib/NoEmitOnErrorsPlugin.js +1 -1
- package/lib/NodeStuffPlugin.js +11 -2
- package/lib/NormalModule.js +43 -33
- package/lib/NormalModuleFactory.js +35 -2
- package/lib/NormalModuleReplacementPlugin.js +1 -1
- package/lib/NullFactory.js +1 -0
- package/lib/OptionsApply.js +1 -0
- package/lib/Parser.js +3 -1
- package/lib/PlatformPlugin.js +2 -1
- package/lib/PrefetchPlugin.js +2 -1
- package/lib/ProgressPlugin.js +158 -68
- package/lib/ProvidePlugin.js +3 -1
- package/lib/RawModule.js +12 -0
- package/lib/RecordIdsPlugin.js +8 -0
- package/lib/RequestShortener.js +8 -0
- package/lib/ResolverFactory.js +5 -0
- package/lib/RuntimeGlobals.js +6 -5
- package/lib/RuntimeModule.js +22 -7
- package/lib/RuntimePlugin.js +13 -0
- package/lib/RuntimeTemplate.js +35 -1
- package/lib/SelfModuleFactory.js +2 -0
- package/lib/SourceMapDevToolModuleOptionsPlugin.js +2 -0
- package/lib/SourceMapDevToolPlugin.js +3 -1
- package/lib/Stats.js +5 -0
- package/lib/Template.js +20 -0
- package/lib/TemplatedPathPlugin.js +10 -3
- package/lib/UseStrictPlugin.js +2 -1
- package/lib/WarnCaseSensitiveModulesPlugin.js +71 -3
- package/lib/WarnDeprecatedOptionPlugin.js +2 -2
- package/lib/WarnNoModeSetPlugin.js +17 -2
- package/lib/WatchIgnorePlugin.js +4 -1
- package/lib/Watching.js +16 -3
- package/lib/WebpackError.js +3 -74
- package/lib/WebpackIsIncludedPlugin.js +3 -1
- package/lib/WebpackOptionsApply.js +14 -1
- package/lib/WebpackOptionsDefaulter.js +1 -0
- package/lib/asset/AssetBytesGenerator.js +12 -2
- package/lib/asset/AssetBytesParser.js +1 -0
- package/lib/asset/AssetGenerator.js +42 -10
- package/lib/asset/AssetModulesPlugin.js +7 -2
- package/lib/asset/AssetParser.js +2 -0
- package/lib/asset/AssetSourceGenerator.js +12 -2
- package/lib/asset/AssetSourceParser.js +1 -0
- package/lib/asset/RawDataUrlModule.js +11 -0
- package/lib/async-modules/AsyncModuleHelpers.js +1 -0
- package/lib/async-modules/AwaitDependenciesInitFragment.js +4 -0
- package/lib/async-modules/InferAsyncModulesPlugin.js +1 -1
- package/lib/buildChunkGraph.js +19 -6
- package/lib/cache/AddBuildDependenciesPlugin.js +2 -1
- package/lib/cache/AddManagedPathsPlugin.js +2 -1
- package/lib/cache/IdleFileCachePlugin.js +2 -1
- package/lib/cache/MemoryCachePlugin.js +1 -1
- package/lib/cache/MemoryWithGcCachePlugin.js +3 -1
- package/lib/cache/PackFileCacheStrategy.js +33 -4
- package/lib/cache/ResolverCachePlugin.js +10 -1
- package/lib/cache/getLazyHashedEtag.js +4 -0
- package/lib/cache/mergeEtags.js +2 -0
- package/lib/cli.js +33 -1
- package/lib/config/browserslistTargetHandler.js +2 -0
- package/lib/config/defaults.js +232 -12
- package/lib/config/normalization.js +15 -1
- package/lib/config/target.js +11 -0
- package/lib/container/ContainerEntryDependency.js +2 -0
- package/lib/container/ContainerEntryModule.js +12 -0
- package/lib/container/ContainerEntryModuleFactory.js +1 -0
- package/lib/container/ContainerExposedDependency.js +4 -0
- package/lib/container/ContainerPlugin.js +2 -1
- package/lib/container/ContainerReferencePlugin.js +2 -1
- package/lib/container/FallbackDependency.js +4 -0
- package/lib/container/FallbackItemDependency.js +1 -0
- package/lib/container/FallbackModule.js +12 -0
- package/lib/container/FallbackModuleFactory.js +1 -0
- package/lib/container/ModuleFederationPlugin.js +3 -1
- package/lib/container/RemoteModule.js +13 -0
- package/lib/container/RemoteRuntimeModule.js +1 -0
- package/lib/container/RemoteToExternalDependency.js +1 -0
- package/lib/container/options.js +7 -0
- package/lib/css/CssGenerator.js +336 -100
- package/lib/css/CssInjectStyleRuntimeModule.js +45 -42
- package/lib/css/CssLoadingRuntimeModule.js +23 -4
- package/lib/{CssModule.js → css/CssModule.js} +21 -15
- package/lib/css/CssModulesPlugin.js +235 -89
- package/lib/css/CssParser.js +616 -261
- package/lib/css/walkCssTokens.js +179 -2
- package/lib/debug/ProfilingPlugin.js +19 -1
- package/lib/dependencies/AMDDefineDependency.js +7 -0
- package/lib/dependencies/AMDDefineDependencyParserPlugin.js +12 -0
- package/lib/dependencies/AMDPlugin.js +4 -1
- package/lib/dependencies/AMDRequireArrayDependency.js +6 -0
- package/lib/dependencies/AMDRequireContextDependency.js +3 -0
- package/lib/dependencies/AMDRequireDependenciesBlock.js +1 -0
- package/lib/dependencies/AMDRequireDependenciesBlockParserPlugin.js +14 -1
- package/lib/dependencies/AMDRequireDependency.js +4 -0
- package/lib/dependencies/AMDRequireItemDependency.js +1 -0
- package/lib/dependencies/AMDRuntimeModules.js +3 -0
- package/lib/dependencies/CachedConstDependency.js +6 -1
- package/lib/dependencies/CommonJsDependencyHelpers.js +64 -0
- package/lib/dependencies/CommonJsExportRequireDependency.js +62 -10
- package/lib/dependencies/CommonJsExportsDependency.js +4 -0
- package/lib/dependencies/CommonJsExportsParserPlugin.js +9 -1
- package/lib/dependencies/CommonJsFullRequireDependency.js +36 -9
- package/lib/dependencies/CommonJsImportsParserPlugin.js +24 -3
- package/lib/dependencies/CommonJsPlugin.js +4 -1
- package/lib/dependencies/CommonJsRequireContextDependency.js +3 -0
- package/lib/dependencies/CommonJsRequireDependency.js +70 -4
- package/lib/dependencies/CommonJsSelfReferenceDependency.js +5 -0
- package/lib/dependencies/ConstDependency.js +6 -1
- package/lib/dependencies/ContextDependency.js +9 -2
- package/lib/dependencies/ContextDependencyHelpers.js +4 -1
- package/lib/dependencies/ContextDependencyTemplateAsId.js +1 -0
- package/lib/dependencies/ContextDependencyTemplateAsRequireCall.js +1 -0
- package/lib/dependencies/ContextElementDependency.js +4 -0
- package/lib/dependencies/CreateRequireParserPlugin.js +7 -1
- package/lib/dependencies/CreateScriptUrlDependency.js +4 -0
- package/lib/dependencies/CriticalDependencyWarning.js +2 -1
- package/lib/dependencies/CssIcssExportDependency.js +339 -66
- package/lib/dependencies/CssIcssImportDependency.js +53 -8
- package/lib/dependencies/CssIcssSymbolDependency.js +16 -4
- package/lib/dependencies/CssImportDependency.js +12 -0
- package/lib/dependencies/CssUrlDependency.js +32 -0
- package/lib/dependencies/DelegatedSourceDependency.js +1 -0
- package/lib/dependencies/DllEntryDependency.js +3 -0
- package/lib/dependencies/DynamicExports.js +5 -0
- package/lib/dependencies/EntryDependency.js +1 -0
- package/lib/dependencies/ExportsInfoDependency.js +6 -0
- package/lib/dependencies/ExternalModuleDependency.js +5 -0
- package/lib/dependencies/ExternalModuleInitFragment.js +3 -0
- package/lib/dependencies/ExternalModuleInitFragmentDependency.js +4 -0
- package/lib/dependencies/HarmonyAcceptDependency.js +6 -0
- package/lib/dependencies/HarmonyAcceptImportDependency.js +1 -0
- package/lib/dependencies/HarmonyCompatibilityDependency.js +1 -0
- package/lib/dependencies/HarmonyDetectionParserPlugin.js +4 -1
- package/lib/dependencies/HarmonyEvaluatedImportSpecifierDependency.js +4 -0
- package/lib/dependencies/HarmonyExportDependencyParserPlugin.js +10 -7
- package/lib/dependencies/HarmonyExportExpressionDependency.js +27 -14
- package/lib/dependencies/HarmonyExportHeaderDependency.js +4 -0
- package/lib/dependencies/HarmonyExportImportedSpecifierDependency.js +143 -5
- package/lib/dependencies/HarmonyExportInitFragment.js +6 -0
- package/lib/dependencies/HarmonyExportSpecifierDependency.js +5 -0
- package/lib/dependencies/HarmonyExports.js +2 -0
- package/lib/dependencies/HarmonyImportDependency.js +21 -2
- package/lib/dependencies/HarmonyImportDependencyParserPlugin.js +31 -1
- package/lib/dependencies/HarmonyImportSideEffectDependency.js +4 -0
- package/lib/dependencies/HarmonyImportSpecifierDependency.js +18 -3
- package/lib/{HarmonyLinkingError.js → dependencies/HarmonyLinkingError.js} +5 -3
- package/lib/dependencies/HarmonyModulesPlugin.js +4 -1
- package/lib/dependencies/HarmonyTopLevelThisParserPlugin.js +1 -0
- package/lib/dependencies/HtmlInlineScriptDependency.js +133 -0
- package/lib/dependencies/HtmlInlineStyleDependency.js +101 -0
- package/lib/dependencies/HtmlScriptSrcDependency.js +318 -0
- package/lib/dependencies/HtmlSourceDependency.js +127 -0
- package/lib/dependencies/ImportContextDependency.js +4 -0
- package/lib/dependencies/ImportDependency.js +5 -0
- package/lib/dependencies/ImportEagerDependency.js +2 -0
- package/lib/dependencies/ImportMetaContextDependency.js +1 -0
- package/lib/dependencies/ImportMetaContextDependencyParserPlugin.js +4 -1
- package/lib/dependencies/ImportMetaContextPlugin.js +2 -1
- package/lib/dependencies/ImportMetaHotAcceptDependency.js +1 -0
- package/lib/dependencies/ImportMetaHotDeclineDependency.js +1 -0
- package/lib/dependencies/ImportMetaPlugin.js +6 -0
- package/lib/dependencies/ImportParserPlugin.js +9 -2
- package/lib/dependencies/ImportPhase.js +7 -3
- package/lib/dependencies/ImportPlugin.js +2 -1
- package/lib/dependencies/ImportWeakDependency.js +2 -0
- package/lib/dependencies/JsonExportsDependency.js +6 -1
- package/lib/dependencies/LoaderDependency.js +2 -0
- package/lib/dependencies/LoaderImportDependency.js +2 -0
- package/lib/dependencies/LoaderPlugin.js +4 -1
- package/lib/dependencies/LocalModule.js +4 -0
- package/lib/dependencies/LocalModuleDependency.js +4 -0
- package/lib/dependencies/LocalModulesHelpers.js +3 -0
- package/lib/dependencies/ModuleDecoratorDependency.js +7 -1
- package/lib/dependencies/ModuleDependency.js +7 -0
- package/lib/dependencies/ModuleDependencyTemplateAsId.js +1 -0
- package/lib/dependencies/ModuleDependencyTemplateAsRequireId.js +1 -0
- package/lib/dependencies/ModuleHotAcceptDependency.js +1 -0
- package/lib/dependencies/ModuleHotDeclineDependency.js +1 -0
- package/lib/dependencies/ModuleInitFragmentDependency.js +4 -0
- package/lib/dependencies/NullDependency.js +2 -0
- package/lib/dependencies/PrefetchDependency.js +1 -0
- package/lib/dependencies/ProvidedDependency.js +6 -1
- package/lib/dependencies/PureExpressionDependency.js +7 -1
- package/lib/dependencies/RequireContextDependency.js +1 -0
- package/lib/dependencies/RequireContextDependencyParserPlugin.js +1 -0
- package/lib/dependencies/RequireContextPlugin.js +2 -1
- package/lib/dependencies/RequireEnsureDependenciesBlock.js +1 -0
- package/lib/dependencies/RequireEnsureDependenciesBlockParserPlugin.js +1 -0
- package/lib/dependencies/RequireEnsureDependency.js +4 -0
- package/lib/dependencies/RequireEnsureItemDependency.js +1 -0
- package/lib/dependencies/RequireEnsurePlugin.js +2 -1
- package/lib/dependencies/RequireHeaderDependency.js +4 -0
- package/lib/dependencies/RequireIncludeDependency.js +2 -0
- package/lib/dependencies/RequireIncludeDependencyParserPlugin.js +4 -1
- package/lib/dependencies/RequireIncludePlugin.js +2 -1
- package/lib/{RequireJsStuffPlugin.js → dependencies/RequireJsStuffPlugin.js} +9 -8
- package/lib/dependencies/RequireResolveContextDependency.js +3 -0
- package/lib/dependencies/RequireResolveDependency.js +1 -0
- package/lib/dependencies/RequireResolveHeaderDependency.js +5 -0
- package/lib/dependencies/RuntimeRequirementsDependency.js +5 -1
- package/lib/dependencies/StaticExportsDependency.js +3 -0
- package/lib/dependencies/SystemPlugin.js +5 -2
- package/lib/dependencies/SystemRuntimeModule.js +1 -0
- package/lib/dependencies/URLContextDependency.js +3 -0
- package/lib/dependencies/URLDependency.js +6 -0
- package/lib/dependencies/URLPlugin.js +2 -0
- package/lib/dependencies/UnsupportedDependency.js +4 -0
- package/lib/dependencies/WebAssemblyExportImportedDependency.js +4 -0
- package/lib/dependencies/WebAssemblyImportDependency.js +5 -2
- package/lib/dependencies/WebpackIsIncludedDependency.js +2 -0
- package/lib/dependencies/WorkerDependency.js +6 -1
- package/lib/dependencies/WorkerPlugin.js +11 -3
- package/lib/dependencies/getFunctionExpression.js +1 -0
- package/lib/dependencies/processExportInfo.js +1 -0
- package/lib/{DelegatedModule.js → dll/DelegatedModule.js} +44 -31
- package/lib/{DelegatedModuleFactoryPlugin.js → dll/DelegatedModuleFactoryPlugin.js} +7 -4
- package/lib/{DelegatedPlugin.js → dll/DelegatedPlugin.js} +4 -3
- package/lib/{DllEntryPlugin.js → dll/DllEntryPlugin.js} +6 -5
- package/lib/{DllModule.js → dll/DllModule.js} +35 -24
- package/lib/{DllModuleFactory.js → dll/DllModuleFactory.js} +5 -4
- package/lib/{DllPlugin.js → dll/DllPlugin.js} +8 -6
- package/lib/{DllReferencePlugin.js → dll/DllReferencePlugin.js} +23 -18
- package/lib/{LibManifestPlugin.js → dll/LibManifestPlugin.js} +13 -10
- package/lib/electron/ElectronTargetPlugin.js +2 -1
- package/lib/{AbstractMethodError.js → errors/AbstractMethodError.js} +10 -1
- package/lib/{AsyncDependencyToInitialChunkError.js → errors/AsyncDependencyToInitialChunkError.js} +8 -3
- package/lib/errors/BuildCycleError.js +1 -1
- package/lib/{ChunkRenderError.js → errors/ChunkRenderError.js} +1 -1
- package/lib/{CodeGenerationError.js → errors/CodeGenerationError.js} +1 -1
- package/lib/{CommentCompilationWarning.js → errors/CommentCompilationWarning.js} +9 -3
- package/lib/{ConcurrentCompilationError.js → errors/ConcurrentCompilationError.js} +4 -2
- package/lib/{EnvironmentNotSupportAsyncWarning.js → errors/EnvironmentNotSupportAsyncWarning.js} +4 -4
- package/lib/{HookWebpackError.js → errors/HookWebpackError.js} +11 -5
- package/lib/{IgnoreErrorModuleFactory.js → errors/IgnoreErrorModuleFactory.js} +6 -4
- package/lib/{InvalidDependenciesModuleWarning.js → errors/InvalidDependenciesModuleWarning.js} +4 -3
- package/lib/errors/JSONParseError.js +114 -0
- package/lib/{ModuleBuildError.js → errors/ModuleBuildError.js} +8 -5
- package/lib/{ModuleDependencyError.js → errors/ModuleDependencyError.js} +2 -2
- package/lib/{ModuleDependencyWarning.js → errors/ModuleDependencyWarning.js} +5 -4
- package/lib/{ModuleError.js → errors/ModuleError.js} +7 -5
- package/lib/{ModuleHashingError.js → errors/ModuleHashingError.js} +1 -1
- package/lib/{ModuleNotFoundError.js → errors/ModuleNotFoundError.js} +3 -2
- package/lib/{ModuleParseError.js → errors/ModuleParseError.js} +11 -6
- package/lib/{ModuleRestoreError.js → errors/ModuleRestoreError.js} +2 -1
- package/lib/{ModuleStoreError.js → errors/ModuleStoreError.js} +2 -1
- package/lib/{ModuleWarning.js → errors/ModuleWarning.js} +8 -5
- package/lib/{NodeStuffInWebError.js → errors/NodeStuffInWebError.js} +5 -4
- package/lib/errors/NonErrorEmittedError.js +28 -0
- package/lib/{UnhandledSchemeError.js → errors/UnhandledSchemeError.js} +8 -2
- package/lib/{UnsupportedFeatureWarning.js → errors/UnsupportedFeatureWarning.js} +4 -3
- package/lib/errors/WebpackError.js +84 -0
- package/lib/esm/ExportWebpackRequireRuntimeModule.js +2 -0
- package/lib/esm/ModuleChunkFormatPlugin.js +5 -1
- package/lib/esm/ModuleChunkLoadingPlugin.js +3 -1
- package/lib/esm/ModuleChunkLoadingRuntimeModule.js +5 -0
- package/lib/hmr/HotModuleReplacementRuntimeModule.js +1 -0
- package/lib/hmr/JavascriptHotModuleReplacementHelper.js +1 -0
- package/lib/hmr/LazyCompilationPlugin.js +20 -1
- package/lib/hmr/lazyCompilationBackend.js +2 -0
- package/lib/html/HtmlGenerator.js +379 -0
- package/lib/html/HtmlModulesPlugin.js +433 -0
- package/lib/html/HtmlParser.js +1489 -0
- package/lib/html/walkHtmlTokens.js +2733 -0
- package/lib/ids/ChunkModuleIdRangePlugin.js +3 -1
- package/lib/ids/DeterministicChunkIdsPlugin.js +3 -1
- package/lib/ids/DeterministicModuleIdsPlugin.js +3 -1
- package/lib/ids/HashedModuleIdsPlugin.js +2 -1
- package/lib/ids/IdHelpers.js +22 -1
- package/lib/ids/NamedChunkIdsPlugin.js +3 -1
- package/lib/ids/NamedModuleIdsPlugin.js +3 -1
- package/lib/ids/NaturalChunkIdsPlugin.js +1 -1
- package/lib/ids/NaturalModuleIdsPlugin.js +1 -1
- package/lib/ids/OccurrenceChunkIdsPlugin.js +2 -1
- package/lib/ids/OccurrenceModuleIdsPlugin.js +4 -1
- package/lib/ids/SyncModuleIdsPlugin.js +3 -1
- package/lib/index.js +39 -15
- package/lib/javascript/ArrayPushCallbackChunkFormatPlugin.js +1 -1
- package/lib/javascript/BasicEvaluatedExpression.js +4 -2
- package/lib/javascript/ChunkFormatHelpers.js +2 -1
- package/lib/javascript/ChunkHelpers.js +1 -0
- package/lib/javascript/CommonJsChunkFormatPlugin.js +1 -1
- package/lib/javascript/EnableChunkLoadingPlugin.js +5 -1
- package/lib/javascript/JavascriptGenerator.js +10 -0
- package/lib/javascript/JavascriptModulesPlugin.js +112 -9
- package/lib/javascript/JavascriptParser.js +360 -16
- package/lib/javascript/JavascriptParserHelpers.js +7 -1
- package/lib/javascript/StartupHelpers.js +5 -0
- package/lib/json/JsonData.js +5 -0
- package/lib/json/JsonGenerator.js +21 -0
- package/lib/json/JsonModulesPlugin.js +1 -1
- package/lib/json/JsonParser.js +14 -25
- package/lib/library/AbstractLibraryPlugin.js +17 -2
- package/lib/library/AmdLibraryPlugin.js +8 -0
- package/lib/library/AssignLibraryPlugin.js +16 -0
- package/lib/library/EnableLibraryPlugin.js +8 -2
- package/lib/library/ExportPropertyLibraryPlugin.js +9 -0
- package/lib/{FalseIIFEUmdWarning.js → library/FalseIIFEUmdWarning.js} +1 -1
- package/lib/library/JsonpLibraryPlugin.js +8 -0
- package/lib/library/ModuleLibraryPlugin.js +86 -1
- package/lib/library/SystemLibraryPlugin.js +8 -0
- package/lib/library/UmdLibraryPlugin.js +16 -0
- package/lib/logging/Logger.js +17 -0
- package/lib/logging/createConsoleLogger.js +7 -0
- package/lib/logging/runtime.js +2 -0
- package/lib/logging/truncateArgs.js +2 -0
- package/lib/node/CommonJsChunkLoadingPlugin.js +5 -1
- package/lib/node/NodeEnvironmentPlugin.js +7 -3
- package/lib/node/NodeSourcePlugin.js +1 -1
- package/lib/node/NodeTargetPlugin.js +2 -1
- package/lib/node/NodeTemplatePlugin.js +3 -1
- package/lib/node/NodeWatchFileSystem.js +2 -0
- package/lib/node/ReadFileChunkLoadingRuntimeModule.js +3 -0
- package/lib/node/ReadFileCompileAsyncWasmPlugin.js +4 -1
- package/lib/node/ReadFileCompileWasmPlugin.js +4 -1
- package/lib/node/RequireChunkLoadingRuntimeModule.js +3 -0
- package/lib/node/nodeConsole.js +116 -64
- package/lib/optimize/AggressiveMergingPlugin.js +3 -1
- package/lib/optimize/AggressiveSplittingPlugin.js +6 -1
- package/lib/optimize/ConcatenatedModule.js +62 -6
- package/lib/optimize/EnsureChunkConditionsPlugin.js +2 -1
- package/lib/optimize/FlagIncludedChunksPlugin.js +2 -1
- package/lib/optimize/InnerGraph.js +16 -1
- package/lib/optimize/InnerGraphPlugin.js +14 -2
- package/lib/optimize/LimitChunkCountPlugin.js +9 -0
- package/lib/optimize/MangleExportsPlugin.js +5 -1
- package/lib/optimize/MergeDuplicateChunksPlugin.js +2 -0
- package/lib/optimize/MinChunkSizePlugin.js +2 -1
- package/lib/optimize/MinMaxSizeWarning.js +5 -4
- package/lib/optimize/ModuleConcatenationPlugin.js +36 -8
- package/lib/optimize/RealContentHashPlugin.js +104 -26
- package/lib/optimize/RemoveEmptyChunksPlugin.js +2 -1
- package/lib/optimize/RemoveParentModulesPlugin.js +3 -1
- package/lib/optimize/RuntimeChunkPlugin.js +2 -1
- package/lib/optimize/SideEffectsFlagPlugin.js +118 -4
- package/lib/optimize/SplitChunksPlugin.js +50 -2
- package/lib/performance/AssetsOverSizeLimitWarning.js +3 -2
- package/lib/performance/EntrypointsOverSizeLimitWarning.js +3 -2
- package/lib/performance/NoAsyncChunksWarning.js +5 -3
- package/lib/performance/SizeLimitsPlugin.js +8 -2
- package/lib/prefetch/ChunkPrefetchFunctionRuntimeModule.js +1 -0
- package/lib/prefetch/ChunkPrefetchPreloadPlugin.js +6 -0
- package/lib/prefetch/ChunkPrefetchStartupRuntimeModule.js +1 -0
- package/lib/prefetch/ChunkPrefetchTriggerRuntimeModule.js +5 -1
- package/lib/prefetch/ChunkPreloadTriggerRuntimeModule.js +1 -0
- package/lib/rules/BasicEffectRulePlugin.js +3 -0
- package/lib/rules/BasicMatcherRulePlugin.js +3 -0
- package/lib/rules/ObjectMatcherRulePlugin.js +3 -0
- package/lib/rules/RuleSetCompiler.js +18 -0
- package/lib/rules/UseEffectRulePlugin.js +10 -3
- package/lib/runtime/AsyncModuleRuntimeModule.js +1 -0
- package/lib/runtime/AutoPublicPathRuntimeModule.js +1 -0
- package/lib/runtime/BaseUriRuntimeModule.js +1 -0
- package/lib/runtime/ChunkNameRuntimeModule.js +1 -0
- package/lib/runtime/CompatGetDefaultExportRuntimeModule.js +1 -0
- package/lib/runtime/CompatRuntimeModule.js +2 -0
- package/lib/runtime/CreateFakeNamespaceObjectRuntimeModule.js +1 -0
- package/lib/runtime/CreateScriptRuntimeModule.js +1 -0
- package/lib/runtime/CreateScriptUrlRuntimeModule.js +1 -0
- package/lib/runtime/DefinePropertyGettersRuntimeModule.js +1 -0
- package/lib/runtime/EnsureChunkRuntimeModule.js +1 -0
- package/lib/runtime/GetChunkFilenameRuntimeModule.js +1 -0
- package/lib/runtime/GetFullHashRuntimeModule.js +1 -0
- package/lib/runtime/GetMainFilenameRuntimeModule.js +1 -0
- package/lib/runtime/GetTrustedTypesPolicyRuntimeModule.js +1 -0
- package/lib/runtime/GlobalRuntimeModule.js +1 -0
- package/lib/runtime/HasOwnPropertyRuntimeModule.js +1 -0
- package/lib/runtime/HelperRuntimeModule.js +5 -0
- package/lib/runtime/LoadScriptRuntimeModule.js +1 -0
- package/lib/runtime/MakeDeferredNamespaceObjectRuntime.js +121 -13
- package/lib/runtime/MakeNamespaceObjectRuntimeModule.js +1 -0
- package/lib/runtime/NonceRuntimeModule.js +1 -0
- package/lib/runtime/OnChunksLoadedRuntimeModule.js +1 -0
- package/lib/runtime/PublicPathRuntimeModule.js +1 -0
- package/lib/runtime/RelativeUrlRuntimeModule.js +1 -0
- package/lib/runtime/RuntimeIdRuntimeModule.js +1 -0
- package/lib/runtime/SetAnonymousDefaultNameRuntimeModule.js +35 -0
- package/lib/runtime/StartupChunkDependenciesPlugin.js +13 -1
- package/lib/runtime/StartupChunkDependenciesRuntimeModule.js +1 -0
- package/lib/runtime/StartupEntrypointRuntimeModule.js +1 -0
- package/lib/runtime/SystemContextRuntimeModule.js +1 -0
- package/lib/runtime/ToBinaryRuntimeModule.js +1 -0
- package/lib/schemes/DataUriPlugin.js +14 -2
- package/lib/schemes/FileUriPlugin.js +1 -1
- package/lib/schemes/HttpUriPlugin.js +43 -1
- package/lib/schemes/VirtualUrlPlugin.js +7 -2
- package/lib/serialization/AggregateErrorSerializer.js +2 -0
- package/lib/serialization/ArraySerializer.js +2 -0
- package/lib/serialization/BinaryMiddleware.js +20 -1
- package/lib/serialization/DateObjectSerializer.js +2 -0
- package/lib/serialization/ErrorObjectSerializer.js +3 -0
- package/lib/serialization/FileMiddleware.js +21 -0
- package/lib/serialization/MapObjectSerializer.js +2 -0
- package/lib/serialization/NullPrototypeObjectSerializer.js +2 -0
- package/lib/serialization/ObjectMiddleware.js +23 -0
- package/lib/serialization/PlainObjectSerializer.js +7 -0
- package/lib/serialization/RegExpObjectSerializer.js +2 -0
- package/lib/serialization/Serializer.js +5 -0
- package/lib/serialization/SerializerMiddleware.js +14 -2
- package/lib/serialization/SetObjectSerializer.js +2 -0
- package/lib/serialization/SingleItemMiddleware.js +3 -0
- package/lib/sharing/ConsumeSharedFallbackDependency.js +1 -0
- package/lib/sharing/ConsumeSharedModule.js +15 -0
- package/lib/sharing/ConsumeSharedPlugin.js +8 -3
- package/lib/sharing/ConsumeSharedRuntimeModule.js +9 -4
- package/lib/sharing/ProvideForSharedDependency.js +1 -0
- package/lib/sharing/ProvideSharedDependency.js +4 -0
- package/lib/sharing/ProvideSharedModule.js +12 -1
- package/lib/sharing/ProvideSharedModuleFactory.js +1 -0
- package/lib/sharing/ProvideSharedPlugin.js +5 -2
- package/lib/sharing/SharePlugin.js +2 -1
- package/lib/sharing/ShareRuntimeModule.js +1 -0
- package/lib/sharing/resolveMatchedConfigs.js +4 -1
- package/lib/sharing/utils.js +8 -0
- package/lib/stats/DefaultStatsFactoryPlugin.js +58 -3
- package/lib/stats/DefaultStatsPresetPlugin.js +12 -2
- package/lib/stats/DefaultStatsPrinterPlugin.js +38 -2
- package/lib/stats/StatsFactory.js +13 -1
- package/lib/stats/StatsPrinter.js +7 -0
- package/lib/typescript/TypeScriptPlugin.js +210 -0
- package/lib/url/URLParserPlugin.js +8 -2
- package/lib/util/AppendOnlyStackedSet.js +15 -0
- package/lib/util/ArrayHelpers.js +1 -0
- package/lib/util/ArrayQueue.js +10 -5
- package/lib/util/AsyncQueue.js +22 -2
- package/lib/util/Hash.js +2 -2
- package/lib/util/IterableHelpers.js +3 -0
- package/lib/util/LazyBucketSortedSet.js +21 -0
- package/lib/util/LazySet.js +39 -0
- package/lib/util/LocConverter.js +53 -0
- package/lib/util/ParallelismFactorCalculator.js +1 -0
- package/lib/util/Queue.js +6 -3
- package/lib/util/Semaphore.js +14 -1
- package/lib/util/SetHelpers.js +3 -0
- package/lib/util/SortableSet.js +7 -1
- package/lib/util/StackedCacheMap.js +20 -3
- package/lib/util/StackedMap.js +45 -0
- package/lib/util/StringXor.js +1 -1
- package/lib/util/TupleQueue.js +7 -3
- package/lib/util/TupleSet.js +13 -0
- package/lib/util/URLAbsoluteSpecifier.js +1 -0
- package/lib/util/WeakTupleMap.js +33 -0
- package/lib/util/binarySearchBounds.js +1 -0
- package/lib/util/cleverMerge.js +19 -2
- package/lib/util/comparators.js +34 -3
- package/lib/util/compileBooleanMatcher.js +9 -0
- package/lib/util/concatenate.js +9 -3
- package/lib/util/conventions.js +46 -1
- package/lib/util/createMappings.js +118 -0
- package/lib/util/dataURL.js +1 -0
- package/lib/util/deprecation.js +19 -0
- package/lib/util/deterministicGrouping.js +20 -0
- package/lib/util/extractSourceMap.js +1 -0
- package/lib/util/extractUrlAndGlobal.js +1 -0
- package/lib/util/findGraphRoots.js +5 -0
- package/lib/{formatLocation.js → util/formatLocation.js} +4 -2
- package/lib/{SizeFormatHelpers.js → util/formatSize.js} +4 -1
- package/lib/util/fs.js +71 -8
- package/lib/util/generateDebugId.js +1 -0
- package/lib/util/hash/BatchedHash.js +1 -0
- package/lib/util/hash/BulkUpdateHash.js +1 -0
- package/lib/util/hash/hash-digest.js +8 -0
- package/lib/util/hash/md4.js +1 -1
- package/lib/util/hash/wasm-hash.js +5 -0
- package/lib/util/hash/xxhash64.js +1 -1
- package/lib/util/identifier.js +67 -0
- package/lib/util/internalSerializables.js +35 -19
- package/lib/util/magicComment.js +10 -6
- package/lib/util/makeSerializable.js +6 -0
- package/lib/util/memoize.js +2 -0
- package/lib/util/mimeTypes.js +176 -0
- package/lib/util/nonNumericOnlyHash.js +1 -0
- package/lib/util/parseJson.js +41 -0
- package/lib/util/processAsyncTree.js +8 -0
- package/lib/util/property.js +1 -0
- package/lib/util/registerExternalSerializer.js +20 -0
- package/lib/util/removeBOM.js +1 -0
- package/lib/util/runtime.js +32 -0
- package/lib/util/semver.js +15 -0
- package/lib/util/serialization.js +2 -0
- package/lib/util/smartGrouping.js +8 -0
- package/lib/util/source.js +23 -0
- package/lib/util/topologicalSort.js +69 -0
- package/lib/validateSchema.js +1 -0
- package/lib/wasm/EnableWasmLoadingPlugin.js +15 -1
- package/lib/wasm-async/AsyncWasmCompileRuntimeModule.js +1 -0
- package/lib/wasm-async/AsyncWasmLoadingRuntimeModule.js +1 -0
- package/lib/wasm-async/AsyncWebAssemblyGenerator.js +6 -0
- package/lib/wasm-async/AsyncWebAssemblyJavascriptGenerator.js +5 -0
- package/lib/wasm-async/AsyncWebAssemblyModulesPlugin.js +12 -3
- package/lib/wasm-async/AsyncWebAssemblyParser.js +2 -1
- package/lib/wasm-async/UniversalCompileAsyncWasmPlugin.js +12 -1
- package/lib/wasm-sync/UnsupportedWebAssemblyFeatureError.js +6 -3
- package/lib/wasm-sync/WasmChunkLoadingRuntimeModule.js +1 -0
- package/lib/wasm-sync/WasmFinalizeExportsPlugin.js +2 -2
- package/lib/wasm-sync/WebAssemblyGenerator.js +26 -1
- package/lib/wasm-sync/WebAssemblyInInitialChunkError.js +7 -3
- package/lib/wasm-sync/WebAssemblyJavascriptGenerator.js +4 -0
- package/lib/wasm-sync/WebAssemblyModulesPlugin.js +10 -1
- package/lib/wasm-sync/WebAssemblyParser.js +2 -0
- package/lib/wasm-sync/WebAssemblyUtils.js +2 -0
- package/lib/web/FetchCompileAsyncWasmPlugin.js +10 -1
- package/lib/web/FetchCompileWasmPlugin.js +13 -1
- package/lib/web/JsonpChunkLoadingPlugin.js +11 -1
- package/lib/web/JsonpChunkLoadingRuntimeModule.js +1 -0
- package/lib/web/JsonpTemplatePlugin.js +2 -1
- package/lib/webpack.js +13 -1
- package/lib/webworker/ImportScriptsChunkLoadingPlugin.js +10 -1
- package/lib/webworker/ImportScriptsChunkLoadingRuntimeModule.js +1 -0
- package/lib/webworker/WebWorkerTemplatePlugin.js +1 -1
- package/package.json +31 -31
- package/schemas/WebpackOptions.check.js +1 -1
- package/schemas/WebpackOptions.json +161 -57
- package/schemas/plugins/{DllPlugin.check.d.ts → HtmlGeneratorOptions.check.d.ts} +1 -1
- package/schemas/plugins/HtmlGeneratorOptions.check.js +6 -0
- package/schemas/plugins/HtmlGeneratorOptions.json +3 -0
- package/schemas/plugins/ProgressPlugin.check.js +1 -1
- package/schemas/plugins/ProgressPlugin.json +22 -0
- package/schemas/plugins/{DllReferencePlugin.check.d.ts → css/CssAutoOrModuleParserOptions.check.d.ts} +1 -1
- package/schemas/plugins/css/CssAutoOrModuleParserOptions.check.js +6 -0
- package/schemas/plugins/css/CssAutoOrModuleParserOptions.json +3 -0
- package/schemas/plugins/dll/DllPlugin.check.d.ts +7 -0
- package/schemas/plugins/dll/DllReferencePlugin.check.d.ts +7 -0
- package/types.d.ts +5325 -353
- package/lib/CaseSensitiveModulesWarning.js +0 -72
- package/lib/GraphHelpers.js +0 -46
- package/lib/NoModeWarning.js +0 -23
- package/lib/css/CssMergeStyleSheetsRuntimeModule.js +0 -56
- /package/schemas/plugins/{DllPlugin.check.js → dll/DllPlugin.check.js} +0 -0
- /package/schemas/plugins/{DllPlugin.json → dll/DllPlugin.json} +0 -0
- /package/schemas/plugins/{DllReferencePlugin.check.js → dll/DllReferencePlugin.check.js} +0 -0
- /package/schemas/plugins/{DllReferencePlugin.json → dll/DllReferencePlugin.json} +0 -0
|
@@ -0,0 +1,2733 @@
|
|
|
1
|
+
/*
|
|
2
|
+
MIT License http://www.opensource.org/licenses/mit-license.php
|
|
3
|
+
Author Raj Aryan (based on SWC parser by Alexander Akait)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
"use strict";
|
|
7
|
+
|
|
8
|
+
// cspell:ignore apos
|
|
9
|
+
|
|
10
|
+
const STATE_DATA = 0;
|
|
11
|
+
const STATE_TAG_OPEN = 1;
|
|
12
|
+
const STATE_END_TAG_OPEN = 2;
|
|
13
|
+
const STATE_TAG_NAME = 3;
|
|
14
|
+
const STATE_BEFORE_ATTRIBUTE_NAME = 4;
|
|
15
|
+
const STATE_ATTRIBUTE_NAME = 5;
|
|
16
|
+
const STATE_AFTER_ATTRIBUTE_NAME = 6;
|
|
17
|
+
const STATE_BEFORE_ATTRIBUTE_VALUE = 7;
|
|
18
|
+
const STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED = 8;
|
|
19
|
+
const STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED = 9;
|
|
20
|
+
const STATE_ATTRIBUTE_VALUE_UNQUOTED = 10;
|
|
21
|
+
const STATE_AFTER_ATTRIBUTE_VALUE_QUOTED = 11;
|
|
22
|
+
const STATE_SELF_CLOSING_START_TAG = 12;
|
|
23
|
+
|
|
24
|
+
const STATE_MARKUP_DECLARATION_OPEN = 13;
|
|
25
|
+
const STATE_COMMENT_START = 14;
|
|
26
|
+
const STATE_COMMENT_START_DASH = 15;
|
|
27
|
+
const STATE_COMMENT = 16;
|
|
28
|
+
const STATE_COMMENT_END_DASH = 17;
|
|
29
|
+
const STATE_COMMENT_END = 18;
|
|
30
|
+
const STATE_COMMENT_END_BANG = 19;
|
|
31
|
+
const STATE_BOGUS_COMMENT = 20;
|
|
32
|
+
|
|
33
|
+
const STATE_COMMENT_LESS_THAN_SIGN = 21;
|
|
34
|
+
const STATE_COMMENT_LESS_THAN_SIGN_BANG = 22;
|
|
35
|
+
const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH = 23;
|
|
36
|
+
const STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 24;
|
|
37
|
+
|
|
38
|
+
const STATE_DOCTYPE = 25;
|
|
39
|
+
const STATE_BEFORE_DOCTYPE_NAME = 26;
|
|
40
|
+
const STATE_DOCTYPE_NAME = 27;
|
|
41
|
+
const STATE_AFTER_DOCTYPE_NAME = 28;
|
|
42
|
+
const STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD = 29;
|
|
43
|
+
const STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 30;
|
|
44
|
+
const STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 31;
|
|
45
|
+
const STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 32;
|
|
46
|
+
const STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 33;
|
|
47
|
+
const STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 34;
|
|
48
|
+
const STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD = 35;
|
|
49
|
+
const STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 36;
|
|
50
|
+
const STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 37;
|
|
51
|
+
const STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 38;
|
|
52
|
+
const STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 39;
|
|
53
|
+
const STATE_BOGUS_DOCTYPE = 40;
|
|
54
|
+
|
|
55
|
+
const STATE_CDATA_SECTION = 41;
|
|
56
|
+
const STATE_CDATA_SECTION_BRACKET = 42;
|
|
57
|
+
const STATE_CDATA_SECTION_END = 43;
|
|
58
|
+
|
|
59
|
+
const STATE_RCDATA = 44;
|
|
60
|
+
const STATE_RCDATA_LESS_THAN_SIGN = 45;
|
|
61
|
+
const STATE_RCDATA_END_TAG_OPEN = 46;
|
|
62
|
+
const STATE_RCDATA_END_TAG_NAME = 47;
|
|
63
|
+
|
|
64
|
+
const STATE_RAWTEXT = 48;
|
|
65
|
+
const STATE_RAWTEXT_LESS_THAN_SIGN = 49;
|
|
66
|
+
const STATE_RAWTEXT_END_TAG_OPEN = 50;
|
|
67
|
+
const STATE_RAWTEXT_END_TAG_NAME = 51;
|
|
68
|
+
|
|
69
|
+
const STATE_SCRIPT_DATA = 52;
|
|
70
|
+
const STATE_SCRIPT_DATA_LESS_THAN_SIGN = 53;
|
|
71
|
+
const STATE_SCRIPT_DATA_END_TAG_OPEN = 54;
|
|
72
|
+
const STATE_SCRIPT_DATA_END_TAG_NAME = 55;
|
|
73
|
+
const STATE_SCRIPT_DATA_ESCAPE_START = 56;
|
|
74
|
+
const STATE_SCRIPT_DATA_ESCAPE_START_DASH = 57;
|
|
75
|
+
const STATE_SCRIPT_DATA_ESCAPED = 58;
|
|
76
|
+
const STATE_SCRIPT_DATA_ESCAPED_DASH = 59;
|
|
77
|
+
const STATE_SCRIPT_DATA_ESCAPED_DASH_DASH = 60;
|
|
78
|
+
const STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 61;
|
|
79
|
+
const STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 62;
|
|
80
|
+
const STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME = 63;
|
|
81
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START = 64;
|
|
82
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED = 65;
|
|
83
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 66;
|
|
84
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 67;
|
|
85
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 68;
|
|
86
|
+
const STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END = 69;
|
|
87
|
+
|
|
88
|
+
const STATE_PLAINTEXT = 70;
|
|
89
|
+
|
|
90
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
|
|
91
|
+
const STATE_CHARACTER_REFERENCE = 71;
|
|
92
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
|
93
|
+
const STATE_NAMED_CHARACTER_REFERENCE = 72;
|
|
94
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
|
|
95
|
+
const STATE_AMBIGUOUS_AMPERSAND = 73;
|
|
96
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
|
|
97
|
+
const STATE_NUMERIC_CHARACTER_REFERENCE = 74;
|
|
98
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
|
|
99
|
+
const STATE_HEXADECIMAL_CHARACTER_REFERENCE_START = 75;
|
|
100
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
|
|
101
|
+
const STATE_DECIMAL_CHARACTER_REFERENCE_START = 76;
|
|
102
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
|
|
103
|
+
const STATE_HEXADECIMAL_CHARACTER_REFERENCE = 77;
|
|
104
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
|
|
105
|
+
const STATE_DECIMAL_CHARACTER_REFERENCE = 78;
|
|
106
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
|
107
|
+
const STATE_NUMERIC_CHARACTER_REFERENCE_END = 79;
|
|
108
|
+
|
|
109
|
+
const CC_TAB = 0x09;
|
|
110
|
+
const CC_LF = 0x0a;
|
|
111
|
+
const CC_FF = 0x0c;
|
|
112
|
+
const CC_SPACE = 0x20;
|
|
113
|
+
const CC_EXCLAMATION_MARK = 0x21;
|
|
114
|
+
const CC_QUOTATION_MARK = 0x22;
|
|
115
|
+
const CC_NUMBER_SIGN = 0x23;
|
|
116
|
+
const CC_AMPERSAND = 0x26;
|
|
117
|
+
const CC_APOSTROPHE = 0x27;
|
|
118
|
+
const CC_HYPHEN_MINUS = 0x2d;
|
|
119
|
+
const CC_SOLIDUS = 0x2f;
|
|
120
|
+
const CC_SEMICOLON = 0x3b;
|
|
121
|
+
const CC_LESS_THAN = 0x3c;
|
|
122
|
+
const CC_EQUALS = 0x3d;
|
|
123
|
+
const CC_GREATER_THAN = 0x3e;
|
|
124
|
+
const CC_QUESTION_MARK = 0x3f;
|
|
125
|
+
const CC_LEFT_SQUARE_BRACKET = 0x5b;
|
|
126
|
+
const CC_RIGHT_SQUARE_BRACKET = 0x5d;
|
|
127
|
+
|
|
128
|
+
const QUOTE_DOUBLE = 1;
|
|
129
|
+
const QUOTE_SINGLE = 2;
|
|
130
|
+
const QUOTE_NONE = 0;
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* @param {number} cc character code
|
|
134
|
+
* @returns {boolean} is ascii alpha
|
|
135
|
+
*/
|
|
136
|
+
const isAsciiAlpha = (cc) =>
|
|
137
|
+
(cc >= 0x41 && cc <= 0x5a) || (cc >= 0x61 && cc <= 0x7a);
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* @param {number} cc character code
|
|
141
|
+
* @returns {boolean} is ascii alphanumeric
|
|
142
|
+
*/
|
|
143
|
+
const isAsciiAlphanumeric = (cc) =>
|
|
144
|
+
isAsciiAlpha(cc) || (cc >= 0x30 && cc <= 0x39);
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* @param {number} cc character code
|
|
148
|
+
* @returns {boolean} is ascii digit
|
|
149
|
+
*/
|
|
150
|
+
const isAsciiDigit = (cc) => cc >= 0x30 && cc <= 0x39;
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* @param {number} cc character code
|
|
154
|
+
* @returns {boolean} is ascii hex digit
|
|
155
|
+
*/
|
|
156
|
+
const isAsciiHexDigit = (cc) =>
|
|
157
|
+
(cc >= 0x30 && cc <= 0x39) ||
|
|
158
|
+
(cc >= 0x41 && cc <= 0x46) ||
|
|
159
|
+
(cc >= 0x61 && cc <= 0x66);
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* @param {number} cc character code
|
|
163
|
+
* @returns {boolean} is space
|
|
164
|
+
*/
|
|
165
|
+
const isSpace = (cc) =>
|
|
166
|
+
cc === CC_TAB || cc === CC_LF || cc === CC_FF || cc === CC_SPACE;
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* @typedef {object} HtmlTokenCallbacks
|
|
170
|
+
* @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number, selfClosing: boolean) => number=} openTag
|
|
171
|
+
* @property {(input: string, start: number, end: number, nameStart: number, nameEnd: number) => number=} closeTag
|
|
172
|
+
* @property {(input: string, start: number, end: number) => number=} text
|
|
173
|
+
* @property {(input: string, nameStart: number, nameEnd: number, valueStart: number, valueEnd: number, quoteType: number) => number=} attribute
|
|
174
|
+
* @property {(input: string, start: number, end: number) => number=} comment
|
|
175
|
+
* @property {(input: string, start: number, end: number) => number=} doctype
|
|
176
|
+
*/
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* @param {string} input input string
|
|
180
|
+
* @param {number} pos current position
|
|
181
|
+
* @param {HtmlTokenCallbacks} callbacks callbacks
|
|
182
|
+
* @returns {number} final position
|
|
183
|
+
*/
|
|
184
|
+
const walkHtmlTokens = (input, pos = 0, callbacks = {}) => {
|
|
185
|
+
const len = input.length;
|
|
186
|
+
let state = STATE_DATA;
|
|
187
|
+
let returnState = STATE_DATA;
|
|
188
|
+
|
|
189
|
+
let textStart = pos;
|
|
190
|
+
let tagStart = pos;
|
|
191
|
+
let tagNameStart = -1;
|
|
192
|
+
let tagNameEnd = -1;
|
|
193
|
+
let attrNameStart = -1;
|
|
194
|
+
let attrNameEnd = -1;
|
|
195
|
+
let attrValueStart = -1;
|
|
196
|
+
let attrQuoteType = QUOTE_NONE;
|
|
197
|
+
let commentStart = pos;
|
|
198
|
+
let lastOpenTagName = "";
|
|
199
|
+
let tempBuffer = "";
|
|
200
|
+
let namedEntityConsumed = 0;
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* @param {number} cc character code
|
|
204
|
+
* @returns {boolean} is ascii lower alpha
|
|
205
|
+
*/
|
|
206
|
+
const isAsciiLowerAlpha = (cc) => cc >= 0x61 && cc <= 0x7a;
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* @param {number} cc character code
|
|
210
|
+
* @returns {boolean} is ascii upper alpha
|
|
211
|
+
*/
|
|
212
|
+
const isAsciiUpperAlpha = (cc) => cc >= 0x41 && cc <= 0x5a;
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* @param {string} name tag name (lowercase)
|
|
216
|
+
* @returns {number} content mode state for this tag, or STATE_DATA
|
|
217
|
+
*/
|
|
218
|
+
const getContentModeForTag = (name) => {
|
|
219
|
+
switch (name) {
|
|
220
|
+
case "textarea":
|
|
221
|
+
case "title":
|
|
222
|
+
return STATE_RCDATA;
|
|
223
|
+
case "style":
|
|
224
|
+
case "xmp":
|
|
225
|
+
case "iframe":
|
|
226
|
+
case "noembed":
|
|
227
|
+
case "noframes":
|
|
228
|
+
return STATE_RAWTEXT;
|
|
229
|
+
case "script":
|
|
230
|
+
return STATE_SCRIPT_DATA;
|
|
231
|
+
case "plaintext":
|
|
232
|
+
return STATE_PLAINTEXT;
|
|
233
|
+
default:
|
|
234
|
+
return STATE_DATA;
|
|
235
|
+
}
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* @param {number} endPos end position
|
|
240
|
+
*/
|
|
241
|
+
const flushText = (endPos) => {
|
|
242
|
+
if (textStart < endPos && callbacks.text !== undefined) {
|
|
243
|
+
callbacks.text(input, textStart, endPos);
|
|
244
|
+
}
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* @param {number} endPos end position
|
|
249
|
+
* @returns {number} next position
|
|
250
|
+
*/
|
|
251
|
+
const emitAttribute = (endPos) => {
|
|
252
|
+
let nextPos = endPos;
|
|
253
|
+
if (callbacks.attribute !== undefined && attrNameStart !== -1) {
|
|
254
|
+
nextPos = callbacks.attribute(
|
|
255
|
+
input,
|
|
256
|
+
attrNameStart,
|
|
257
|
+
attrNameEnd,
|
|
258
|
+
attrValueStart,
|
|
259
|
+
attrValueStart === -1 ? -1 : endPos,
|
|
260
|
+
attrQuoteType
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
attrNameStart = -1;
|
|
264
|
+
attrValueStart = -1;
|
|
265
|
+
attrQuoteType = QUOTE_NONE;
|
|
266
|
+
return nextPos;
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* @param {number} endPos end position
|
|
271
|
+
* @param {boolean} selfClosing is self closing
|
|
272
|
+
* @returns {number} next position
|
|
273
|
+
*/
|
|
274
|
+
const emitOpenTag = (endPos, selfClosing) => {
|
|
275
|
+
let nextPos = endPos;
|
|
276
|
+
if (callbacks.openTag !== undefined) {
|
|
277
|
+
nextPos = callbacks.openTag(
|
|
278
|
+
input,
|
|
279
|
+
tagStart,
|
|
280
|
+
endPos,
|
|
281
|
+
tagNameStart,
|
|
282
|
+
tagNameEnd,
|
|
283
|
+
selfClosing
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
if (!selfClosing) {
|
|
287
|
+
lastOpenTagName = input.slice(tagNameStart, tagNameEnd).toLowerCase();
|
|
288
|
+
}
|
|
289
|
+
textStart = nextPos;
|
|
290
|
+
return nextPos;
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* @param {number} endPos end position
|
|
295
|
+
* @returns {number} next position
|
|
296
|
+
*/
|
|
297
|
+
const emitCloseTag = (endPos) => {
|
|
298
|
+
let nextPos = endPos;
|
|
299
|
+
if (callbacks.closeTag !== undefined) {
|
|
300
|
+
nextPos = callbacks.closeTag(
|
|
301
|
+
input,
|
|
302
|
+
tagStart,
|
|
303
|
+
endPos,
|
|
304
|
+
tagNameStart,
|
|
305
|
+
tagNameEnd
|
|
306
|
+
);
|
|
307
|
+
}
|
|
308
|
+
textStart = nextPos;
|
|
309
|
+
return nextPos;
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
while (pos < len) {
|
|
313
|
+
const cc = input.charCodeAt(pos);
|
|
314
|
+
|
|
315
|
+
// TODO: We don't handle all states here yet. In the future we will need to handle
|
|
316
|
+
// all of them, and when we move all the tokenizer we will remove it.
|
|
317
|
+
switch (state) {
|
|
318
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#data-state
|
|
319
|
+
case STATE_DATA:
|
|
320
|
+
// Consume the next input character:
|
|
321
|
+
// U+003C LESS-THAN SIGN (<)
|
|
322
|
+
// Set the return state to the data state. Switch to the tag open state.
|
|
323
|
+
if (cc === CC_LESS_THAN) {
|
|
324
|
+
tagStart = pos;
|
|
325
|
+
state = STATE_TAG_OPEN;
|
|
326
|
+
pos++;
|
|
327
|
+
} else if (cc === CC_AMPERSAND) {
|
|
328
|
+
// U+0026 AMPERSAND (&)
|
|
329
|
+
// Set the return state to the data state. Switch to the
|
|
330
|
+
// character reference state.
|
|
331
|
+
returnState = STATE_DATA;
|
|
332
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
333
|
+
pos++;
|
|
334
|
+
} else {
|
|
335
|
+
pos++;
|
|
336
|
+
}
|
|
337
|
+
break;
|
|
338
|
+
|
|
339
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
|
|
340
|
+
case STATE_TAG_OPEN:
|
|
341
|
+
// Consume the next input character:
|
|
342
|
+
// U+002F SOLIDUS (/)
|
|
343
|
+
// Switch to the end tag open state.
|
|
344
|
+
if (cc === CC_SOLIDUS) {
|
|
345
|
+
state = STATE_END_TAG_OPEN;
|
|
346
|
+
pos++;
|
|
347
|
+
} else if (cc === CC_EXCLAMATION_MARK) {
|
|
348
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
349
|
+
// Switch to the markup declaration open state.
|
|
350
|
+
flushText(tagStart);
|
|
351
|
+
state = STATE_MARKUP_DECLARATION_OPEN;
|
|
352
|
+
pos++;
|
|
353
|
+
} else if (isAsciiAlpha(cc)) {
|
|
354
|
+
// ASCII alpha
|
|
355
|
+
// Create a new start tag token, set its tag name to the empty string.
|
|
356
|
+
// Reconsume in the tag name state.
|
|
357
|
+
flushText(tagStart);
|
|
358
|
+
tagNameStart = pos;
|
|
359
|
+
state = STATE_TAG_NAME;
|
|
360
|
+
// Reconsume
|
|
361
|
+
} else if (cc === CC_QUESTION_MARK) {
|
|
362
|
+
// U+003F QUESTION MARK (?)
|
|
363
|
+
// This is an unexpected-question-mark-instead-of-tag-name parse error.
|
|
364
|
+
// Create a comment token whose data is the empty string. Reconsume in the
|
|
365
|
+
// bogus comment state.
|
|
366
|
+
flushText(tagStart);
|
|
367
|
+
commentStart = tagStart;
|
|
368
|
+
state = STATE_BOGUS_COMMENT;
|
|
369
|
+
pos++;
|
|
370
|
+
// Anything else
|
|
371
|
+
// This is an invalid-first-character-of-tag-name parse error. Emit a U+003C
|
|
372
|
+
// LESS-THAN SIGN character token. Reconsume in the data state.
|
|
373
|
+
} else {
|
|
374
|
+
state = STATE_DATA;
|
|
375
|
+
// Reconsume
|
|
376
|
+
}
|
|
377
|
+
break;
|
|
378
|
+
|
|
379
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
|
|
380
|
+
case STATE_END_TAG_OPEN:
|
|
381
|
+
// Consume the next input character:
|
|
382
|
+
// ASCII alpha
|
|
383
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
384
|
+
// Reconsume in the tag name state.
|
|
385
|
+
if (isAsciiAlpha(cc)) {
|
|
386
|
+
flushText(tagStart);
|
|
387
|
+
tagNameStart = pos;
|
|
388
|
+
state = STATE_TAG_NAME;
|
|
389
|
+
// Reconsume
|
|
390
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
391
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
392
|
+
// This is a missing-end-tag-name parse error. Switch to the data state.
|
|
393
|
+
state = STATE_DATA;
|
|
394
|
+
pos++;
|
|
395
|
+
} else {
|
|
396
|
+
// Anything else
|
|
397
|
+
// This is an invalid-first-character-of-tag-name parse error. Create a
|
|
398
|
+
// comment token whose data is the empty string. Reconsume in the bogus
|
|
399
|
+
// comment state.
|
|
400
|
+
flushText(tagStart);
|
|
401
|
+
commentStart = tagStart;
|
|
402
|
+
state = STATE_BOGUS_COMMENT;
|
|
403
|
+
pos++;
|
|
404
|
+
}
|
|
405
|
+
break;
|
|
406
|
+
|
|
407
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
|
408
|
+
case STATE_TAG_NAME:
|
|
409
|
+
// Consume the next input character:
|
|
410
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
411
|
+
// U+000A LINE FEED (LF)
|
|
412
|
+
// U+000C FORM FEED (FF)
|
|
413
|
+
// U+0020 SPACE
|
|
414
|
+
// Switch to the before attribute name state.
|
|
415
|
+
if (isSpace(cc)) {
|
|
416
|
+
tagNameEnd = pos;
|
|
417
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
418
|
+
pos++;
|
|
419
|
+
} else if (cc === CC_SOLIDUS) {
|
|
420
|
+
// U+002F SOLIDUS (/)
|
|
421
|
+
// Switch to the self-closing start tag state.
|
|
422
|
+
tagNameEnd = pos;
|
|
423
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
424
|
+
pos++;
|
|
425
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
426
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
427
|
+
// Switch to the data state. Emit the current tag token.
|
|
428
|
+
tagNameEnd = pos;
|
|
429
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
430
|
+
state = STATE_DATA;
|
|
431
|
+
pos = emitCloseTag(pos + 1);
|
|
432
|
+
} else {
|
|
433
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
434
|
+
state =
|
|
435
|
+
nextPos > pos + 1
|
|
436
|
+
? STATE_DATA
|
|
437
|
+
: getContentModeForTag(lastOpenTagName);
|
|
438
|
+
pos = nextPos;
|
|
439
|
+
}
|
|
440
|
+
} else {
|
|
441
|
+
pos++;
|
|
442
|
+
}
|
|
443
|
+
break;
|
|
444
|
+
|
|
445
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
446
|
+
case STATE_BEFORE_ATTRIBUTE_NAME:
|
|
447
|
+
// Consume the next input character:
|
|
448
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
449
|
+
// U+000A LINE FEED (LF)
|
|
450
|
+
// U+000C FORM FEED (FF)
|
|
451
|
+
// U+0020 SPACE
|
|
452
|
+
// Ignore the character.
|
|
453
|
+
// Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
|
|
454
|
+
if (isSpace(cc)) {
|
|
455
|
+
pos++;
|
|
456
|
+
} else if (cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
457
|
+
// U+002F SOLIDUS (/)
|
|
458
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
459
|
+
// EOF
|
|
460
|
+
// Reconsume in the after attribute name state.
|
|
461
|
+
state = STATE_AFTER_ATTRIBUTE_NAME;
|
|
462
|
+
// Reconsume
|
|
463
|
+
} else if (cc === CC_EQUALS) {
|
|
464
|
+
attrNameStart = pos;
|
|
465
|
+
state = STATE_ATTRIBUTE_NAME;
|
|
466
|
+
pos++;
|
|
467
|
+
} else {
|
|
468
|
+
// Anything else
|
|
469
|
+
// Start a new attribute in the current tag token. Set that attribute name
|
|
470
|
+
// and value to the empty string. Reconsume in the attribute name state.
|
|
471
|
+
attrNameStart = pos;
|
|
472
|
+
state = STATE_ATTRIBUTE_NAME;
|
|
473
|
+
// Reconsume
|
|
474
|
+
}
|
|
475
|
+
break;
|
|
476
|
+
|
|
477
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
478
|
+
case STATE_ATTRIBUTE_NAME:
|
|
479
|
+
// Consume the next input character:
|
|
480
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
481
|
+
// U+000A LINE FEED (LF)
|
|
482
|
+
// U+000C FORM FEED (FF)
|
|
483
|
+
// U+0020 SPACE
|
|
484
|
+
// U+002F SOLIDUS (/)
|
|
485
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
486
|
+
// EOF
|
|
487
|
+
// Reconsume in the after attribute name state.
|
|
488
|
+
if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
489
|
+
attrNameEnd = pos;
|
|
490
|
+
state = STATE_AFTER_ATTRIBUTE_NAME;
|
|
491
|
+
// Reconsume
|
|
492
|
+
} else if (cc === CC_EQUALS) {
|
|
493
|
+
attrNameEnd = pos;
|
|
494
|
+
state = STATE_BEFORE_ATTRIBUTE_VALUE;
|
|
495
|
+
pos++;
|
|
496
|
+
} else {
|
|
497
|
+
pos++;
|
|
498
|
+
}
|
|
499
|
+
break;
|
|
500
|
+
|
|
501
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
|
|
502
|
+
case STATE_AFTER_ATTRIBUTE_NAME:
|
|
503
|
+
// Consume the next input character:
|
|
504
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
505
|
+
// U+000A LINE FEED (LF)
|
|
506
|
+
// U+000C FORM FEED (FF)
|
|
507
|
+
// U+0020 SPACE
|
|
508
|
+
// Ignore the character.
|
|
509
|
+
if (isSpace(cc)) {
|
|
510
|
+
pos++;
|
|
511
|
+
} else if (cc === CC_SOLIDUS) {
|
|
512
|
+
// U+002F SOLIDUS (/)
|
|
513
|
+
// Switch to the self-closing start tag state.
|
|
514
|
+
emitAttribute(pos);
|
|
515
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
516
|
+
pos++;
|
|
517
|
+
} else if (cc === CC_EQUALS) {
|
|
518
|
+
// U+003D EQUALS SIGN (=)
|
|
519
|
+
// Switch to the before attribute value state.
|
|
520
|
+
state = STATE_BEFORE_ATTRIBUTE_VALUE;
|
|
521
|
+
pos++;
|
|
522
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
523
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
524
|
+
// Switch to the data state. Emit the current tag token.
|
|
525
|
+
emitAttribute(pos);
|
|
526
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
527
|
+
state = STATE_DATA;
|
|
528
|
+
pos = emitCloseTag(pos + 1);
|
|
529
|
+
} else {
|
|
530
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
531
|
+
state =
|
|
532
|
+
nextPos > pos + 1
|
|
533
|
+
? STATE_DATA
|
|
534
|
+
: getContentModeForTag(lastOpenTagName);
|
|
535
|
+
pos = nextPos;
|
|
536
|
+
}
|
|
537
|
+
} else {
|
|
538
|
+
// Anything else
|
|
539
|
+
// Start a new attribute in the current tag token.
|
|
540
|
+
emitAttribute(pos);
|
|
541
|
+
attrNameStart = pos;
|
|
542
|
+
state = STATE_ATTRIBUTE_NAME;
|
|
543
|
+
// Reconsume
|
|
544
|
+
}
|
|
545
|
+
break;
|
|
546
|
+
|
|
547
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
|
|
548
|
+
case STATE_BEFORE_ATTRIBUTE_VALUE:
|
|
549
|
+
// Consume the next input character:
|
|
550
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
551
|
+
// U+000A LINE FEED (LF)
|
|
552
|
+
// U+000C FORM FEED (FF)
|
|
553
|
+
// U+0020 SPACE
|
|
554
|
+
// Ignore the character.
|
|
555
|
+
if (isSpace(cc)) {
|
|
556
|
+
pos++;
|
|
557
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
558
|
+
// U+0022 QUOTATION MARK (")
|
|
559
|
+
// Switch to the attribute value (double-quoted) state.
|
|
560
|
+
attrValueStart = pos + 1;
|
|
561
|
+
attrQuoteType = QUOTE_DOUBLE;
|
|
562
|
+
state = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
|
|
563
|
+
pos++;
|
|
564
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
565
|
+
// U+0027 APOSTROPHE (')
|
|
566
|
+
// Switch to the attribute value (single-quoted) state.
|
|
567
|
+
attrValueStart = pos + 1;
|
|
568
|
+
attrQuoteType = QUOTE_SINGLE;
|
|
569
|
+
state = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
|
|
570
|
+
pos++;
|
|
571
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
572
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
573
|
+
// Switch to the data state. Emit the current tag token.
|
|
574
|
+
pos = emitAttribute(pos);
|
|
575
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
576
|
+
state = STATE_DATA;
|
|
577
|
+
pos = emitCloseTag(pos + 1);
|
|
578
|
+
} else {
|
|
579
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
580
|
+
state =
|
|
581
|
+
nextPos > pos + 1
|
|
582
|
+
? STATE_DATA
|
|
583
|
+
: getContentModeForTag(lastOpenTagName);
|
|
584
|
+
pos = nextPos;
|
|
585
|
+
}
|
|
586
|
+
} else {
|
|
587
|
+
// Anything else
|
|
588
|
+
// Reconsume in the attribute value (unquoted) state.
|
|
589
|
+
attrValueStart = pos;
|
|
590
|
+
attrQuoteType = QUOTE_NONE;
|
|
591
|
+
state = STATE_ATTRIBUTE_VALUE_UNQUOTED;
|
|
592
|
+
// Reconsume
|
|
593
|
+
}
|
|
594
|
+
break;
|
|
595
|
+
|
|
596
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
|
|
597
|
+
case STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED:
|
|
598
|
+
// Consume the next input character:
|
|
599
|
+
// U+0022 QUOTATION MARK (")
|
|
600
|
+
// Switch to the after attribute value (quoted) state.
|
|
601
|
+
if (cc === CC_QUOTATION_MARK) {
|
|
602
|
+
pos = emitAttribute(pos);
|
|
603
|
+
state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
|
|
604
|
+
} else if (cc === CC_AMPERSAND) {
|
|
605
|
+
// U+0026 AMPERSAND (&)
|
|
606
|
+
// Set the return state to the attribute value (double-quoted)
|
|
607
|
+
// state. Switch to the character reference state.
|
|
608
|
+
returnState = STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
|
|
609
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
610
|
+
pos++;
|
|
611
|
+
} else {
|
|
612
|
+
pos++;
|
|
613
|
+
}
|
|
614
|
+
break;
|
|
615
|
+
|
|
616
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
|
|
617
|
+
case STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED:
|
|
618
|
+
// Consume the next input character:
|
|
619
|
+
// U+0027 APOSTROPHE (')
|
|
620
|
+
// Switch to the after attribute value (quoted) state.
|
|
621
|
+
if (cc === CC_APOSTROPHE) {
|
|
622
|
+
pos = emitAttribute(pos);
|
|
623
|
+
state = STATE_AFTER_ATTRIBUTE_VALUE_QUOTED;
|
|
624
|
+
} else if (cc === CC_AMPERSAND) {
|
|
625
|
+
// U+0026 AMPERSAND (&)
|
|
626
|
+
// Set the return state to the attribute value (single-quoted)
|
|
627
|
+
// state. Switch to the character reference state.
|
|
628
|
+
returnState = STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED;
|
|
629
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
630
|
+
pos++;
|
|
631
|
+
} else {
|
|
632
|
+
pos++;
|
|
633
|
+
}
|
|
634
|
+
break;
|
|
635
|
+
|
|
636
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
|
|
637
|
+
case STATE_ATTRIBUTE_VALUE_UNQUOTED:
|
|
638
|
+
if (isSpace(cc)) {
|
|
639
|
+
pos = emitAttribute(pos);
|
|
640
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
641
|
+
// Reconsume so space is handled in BEFORE_ATTRIBUTE_NAME
|
|
642
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
643
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
644
|
+
// This is a missing-attribute-value parse error. Switch to the data state.
|
|
645
|
+
// Emit the current tag token.
|
|
646
|
+
pos = emitAttribute(pos);
|
|
647
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
648
|
+
state = STATE_DATA;
|
|
649
|
+
pos = emitCloseTag(pos + 1);
|
|
650
|
+
} else {
|
|
651
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
652
|
+
state =
|
|
653
|
+
nextPos > pos + 1
|
|
654
|
+
? STATE_DATA
|
|
655
|
+
: getContentModeForTag(lastOpenTagName);
|
|
656
|
+
pos = nextPos;
|
|
657
|
+
}
|
|
658
|
+
} else if (cc === CC_AMPERSAND) {
|
|
659
|
+
// U+0026 AMPERSAND (&)
|
|
660
|
+
// Set the return state to the attribute value (unquoted)
|
|
661
|
+
// state. Switch to the character reference state.
|
|
662
|
+
returnState = STATE_ATTRIBUTE_VALUE_UNQUOTED;
|
|
663
|
+
state = STATE_CHARACTER_REFERENCE;
|
|
664
|
+
pos++;
|
|
665
|
+
} else {
|
|
666
|
+
pos++;
|
|
667
|
+
}
|
|
668
|
+
break;
|
|
669
|
+
|
|
670
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
|
|
671
|
+
case STATE_AFTER_ATTRIBUTE_VALUE_QUOTED:
|
|
672
|
+
// Consume the next input character:
|
|
673
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
674
|
+
// U+000A LINE FEED (LF)
|
|
675
|
+
// U+000C FORM FEED (FF)
|
|
676
|
+
// U+0020 SPACE
|
|
677
|
+
// Switch to the before attribute name state.
|
|
678
|
+
if (isSpace(cc)) {
|
|
679
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
680
|
+
pos++;
|
|
681
|
+
} else if (cc === CC_SOLIDUS) {
|
|
682
|
+
// U+002F SOLIDUS (/)
|
|
683
|
+
// Switch to the self-closing start tag state.
|
|
684
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
685
|
+
pos++;
|
|
686
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
687
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
688
|
+
state = STATE_DATA;
|
|
689
|
+
pos = emitCloseTag(pos + 1);
|
|
690
|
+
} else {
|
|
691
|
+
const nextPos = emitOpenTag(pos + 1, false);
|
|
692
|
+
state =
|
|
693
|
+
nextPos > pos + 1
|
|
694
|
+
? STATE_DATA
|
|
695
|
+
: getContentModeForTag(lastOpenTagName);
|
|
696
|
+
pos = nextPos;
|
|
697
|
+
}
|
|
698
|
+
} else {
|
|
699
|
+
// Anything else
|
|
700
|
+
// This is a missing-whitespace-between-attributes parse error. Reconsume in
|
|
701
|
+
// the before attribute name state.
|
|
702
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
703
|
+
// Reconsume
|
|
704
|
+
}
|
|
705
|
+
break;
|
|
706
|
+
|
|
707
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
|
|
708
|
+
case STATE_SELF_CLOSING_START_TAG:
|
|
709
|
+
// Consume the next input character:
|
|
710
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
711
|
+
// Set the self-closing flag of the current tag token. Switch to the data
|
|
712
|
+
// state. Emit the current tag token.
|
|
713
|
+
if (cc === CC_GREATER_THAN) {
|
|
714
|
+
if (input.charCodeAt(tagStart + 1) === CC_SOLIDUS) {
|
|
715
|
+
state = STATE_DATA;
|
|
716
|
+
pos = emitCloseTag(pos + 1);
|
|
717
|
+
} else {
|
|
718
|
+
pos = emitOpenTag(pos + 1, true);
|
|
719
|
+
state = STATE_DATA;
|
|
720
|
+
}
|
|
721
|
+
} else {
|
|
722
|
+
// Anything else
|
|
723
|
+
// This is an unexpected-solidus-in-tag parse error. Reconsume in the before
|
|
724
|
+
// attribute name state.
|
|
725
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
726
|
+
// Reconsume
|
|
727
|
+
}
|
|
728
|
+
break;
|
|
729
|
+
|
|
730
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
|
|
731
|
+
case STATE_MARKUP_DECLARATION_OPEN:
|
|
732
|
+
// If the next few characters are:
|
|
733
|
+
// Two U+002D HYPHEN-MINUS characters (-)
|
|
734
|
+
// Consume those two characters, create a comment token whose data
|
|
735
|
+
// is the empty string, and switch to the comment start state.
|
|
736
|
+
if (
|
|
737
|
+
cc === CC_HYPHEN_MINUS &&
|
|
738
|
+
input.charCodeAt(pos + 1) === CC_HYPHEN_MINUS
|
|
739
|
+
) {
|
|
740
|
+
pos += 2;
|
|
741
|
+
commentStart = tagStart;
|
|
742
|
+
state = STATE_COMMENT_START;
|
|
743
|
+
} else if (
|
|
744
|
+
// ASCII case-insensitive match for the word "DOCTYPE"
|
|
745
|
+
// Consume those characters and switch to the DOCTYPE state.
|
|
746
|
+
(cc === 0x44 || cc === 0x64) /* D or d */ &&
|
|
747
|
+
(input.charCodeAt(pos + 1) | 0x20) === 0x6f /* o */ &&
|
|
748
|
+
(input.charCodeAt(pos + 2) | 0x20) === 0x63 /* c */ &&
|
|
749
|
+
(input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
|
|
750
|
+
(input.charCodeAt(pos + 4) | 0x20) === 0x79 /* y */ &&
|
|
751
|
+
(input.charCodeAt(pos + 5) | 0x20) === 0x70 /* p */ &&
|
|
752
|
+
(input.charCodeAt(pos + 6) | 0x20) === 0x65 /* e */
|
|
753
|
+
) {
|
|
754
|
+
pos += 7;
|
|
755
|
+
commentStart = tagStart;
|
|
756
|
+
state = STATE_DOCTYPE;
|
|
757
|
+
} else if (
|
|
758
|
+
// The string "[CDATA[" (the five uppercase letters "CDATA" with a
|
|
759
|
+
// U+005B LEFT SQUARE BRACKET character before and after)
|
|
760
|
+
// Consume those characters and switch to the CDATA section state.
|
|
761
|
+
cc === CC_LEFT_SQUARE_BRACKET &&
|
|
762
|
+
input.charCodeAt(pos + 1) === 0x43 /* C */ &&
|
|
763
|
+
input.charCodeAt(pos + 2) === 0x44 /* D */ &&
|
|
764
|
+
input.charCodeAt(pos + 3) === 0x41 /* A */ &&
|
|
765
|
+
input.charCodeAt(pos + 4) === 0x54 /* T */ &&
|
|
766
|
+
input.charCodeAt(pos + 5) === 0x41 /* A */ &&
|
|
767
|
+
input.charCodeAt(pos + 6) === CC_LEFT_SQUARE_BRACKET
|
|
768
|
+
) {
|
|
769
|
+
pos += 7;
|
|
770
|
+
commentStart = tagStart;
|
|
771
|
+
state = STATE_CDATA_SECTION;
|
|
772
|
+
} else {
|
|
773
|
+
// Anything else
|
|
774
|
+
// This is an incorrectly-opened-comment parse error. Create a comment token
|
|
775
|
+
// whose data is the empty string. Switch to the bogus comment state (don't
|
|
776
|
+
// consume anything in the current state).
|
|
777
|
+
commentStart = tagStart;
|
|
778
|
+
state = STATE_BOGUS_COMMENT;
|
|
779
|
+
// Reconsume
|
|
780
|
+
}
|
|
781
|
+
break;
|
|
782
|
+
|
|
783
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
|
|
784
|
+
case STATE_COMMENT_START:
|
|
785
|
+
// Consume the next input character:
|
|
786
|
+
// U+002D HYPHEN-MINUS (-)
|
|
787
|
+
// Switch to the comment start dash state.
|
|
788
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
789
|
+
state = STATE_COMMENT_START_DASH;
|
|
790
|
+
pos++;
|
|
791
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
792
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
793
|
+
// This is an abrupt-closing-of-empty-comment parse error. Switch to the
|
|
794
|
+
// data state. Emit the current comment token.
|
|
795
|
+
let nextPos = pos + 1;
|
|
796
|
+
if (callbacks.comment !== undefined) {
|
|
797
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
798
|
+
}
|
|
799
|
+
state = STATE_DATA;
|
|
800
|
+
textStart = nextPos;
|
|
801
|
+
pos = nextPos;
|
|
802
|
+
} else {
|
|
803
|
+
// Anything else
|
|
804
|
+
// Reconsume in the comment state.
|
|
805
|
+
state = STATE_COMMENT;
|
|
806
|
+
pos++;
|
|
807
|
+
}
|
|
808
|
+
break;
|
|
809
|
+
|
|
810
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
|
|
811
|
+
case STATE_COMMENT_START_DASH:
|
|
812
|
+
// Consume the next input character:
|
|
813
|
+
// U+002D HYPHEN-MINUS (-)
|
|
814
|
+
// Switch to the comment end state.
|
|
815
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
816
|
+
state = STATE_COMMENT_END;
|
|
817
|
+
pos++;
|
|
818
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
819
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
820
|
+
// This is an abrupt-closing-of-empty-comment parse error. Switch to the
|
|
821
|
+
// data state. Emit the current comment token.
|
|
822
|
+
let nextPos = pos + 1;
|
|
823
|
+
if (callbacks.comment !== undefined) {
|
|
824
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
825
|
+
}
|
|
826
|
+
state = STATE_DATA;
|
|
827
|
+
textStart = nextPos;
|
|
828
|
+
pos = nextPos;
|
|
829
|
+
} else {
|
|
830
|
+
// Anything else
|
|
831
|
+
// Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
|
|
832
|
+
// Reconsume in the comment state.
|
|
833
|
+
state = STATE_COMMENT;
|
|
834
|
+
pos++;
|
|
835
|
+
}
|
|
836
|
+
break;
|
|
837
|
+
|
|
838
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-state
|
|
839
|
+
case STATE_COMMENT:
|
|
840
|
+
// Consume the next input character:
|
|
841
|
+
// U+003C LESS-THAN SIGN (<)
|
|
842
|
+
// Append a U+003C LESS-THAN SIGN character to the comment token's data. Switch to the comment less-than sign state.
|
|
843
|
+
if (cc === CC_LESS_THAN) {
|
|
844
|
+
state = STATE_COMMENT_LESS_THAN_SIGN;
|
|
845
|
+
pos++;
|
|
846
|
+
} else if (cc === CC_HYPHEN_MINUS) {
|
|
847
|
+
// Consume the next input character:
|
|
848
|
+
// U+002D HYPHEN-MINUS (-)
|
|
849
|
+
// Switch to the comment end dash state.
|
|
850
|
+
state = STATE_COMMENT_END_DASH;
|
|
851
|
+
pos++;
|
|
852
|
+
} else {
|
|
853
|
+
pos++;
|
|
854
|
+
}
|
|
855
|
+
break;
|
|
856
|
+
|
|
857
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
|
|
858
|
+
case STATE_COMMENT_END_DASH:
|
|
859
|
+
// Consume the next input character:
|
|
860
|
+
// U+002D HYPHEN-MINUS (-)
|
|
861
|
+
// Switch to the comment end state.
|
|
862
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
863
|
+
state = STATE_COMMENT_END;
|
|
864
|
+
pos++;
|
|
865
|
+
} else {
|
|
866
|
+
// Anything else
|
|
867
|
+
// Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
|
|
868
|
+
// Reconsume in the comment state.
|
|
869
|
+
state = STATE_COMMENT;
|
|
870
|
+
pos++;
|
|
871
|
+
}
|
|
872
|
+
break;
|
|
873
|
+
|
|
874
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
|
875
|
+
case STATE_COMMENT_END:
|
|
876
|
+
// Consume the next input character:
|
|
877
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
878
|
+
// Switch to the data state. Emit the current comment token.
|
|
879
|
+
if (cc === CC_GREATER_THAN) {
|
|
880
|
+
let nextPos = pos + 1;
|
|
881
|
+
if (callbacks.comment !== undefined) {
|
|
882
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
883
|
+
}
|
|
884
|
+
state = STATE_DATA;
|
|
885
|
+
textStart = nextPos;
|
|
886
|
+
pos = nextPos;
|
|
887
|
+
} else if (cc === CC_EXCLAMATION_MARK) {
|
|
888
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
889
|
+
// Switch to the markup declaration open state.
|
|
890
|
+
state = STATE_COMMENT_END_BANG;
|
|
891
|
+
pos++;
|
|
892
|
+
} else if (cc === CC_HYPHEN_MINUS) {
|
|
893
|
+
pos++;
|
|
894
|
+
} else {
|
|
895
|
+
// Anything else
|
|
896
|
+
// Append two U+002D HYPHEN-MINUS characters (-) to the comment token's
|
|
897
|
+
// data. Reconsume in the comment state.
|
|
898
|
+
state = STATE_COMMENT;
|
|
899
|
+
pos++;
|
|
900
|
+
}
|
|
901
|
+
break;
|
|
902
|
+
|
|
903
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
|
|
904
|
+
case STATE_COMMENT_END_BANG:
|
|
905
|
+
// Consume the next input character:
|
|
906
|
+
// U+002D HYPHEN-MINUS (-)
|
|
907
|
+
// Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
|
|
908
|
+
// MARK character (!) to the comment token's data. Switch to the comment end
|
|
909
|
+
// dash state.
|
|
910
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
911
|
+
state = STATE_COMMENT_END_DASH;
|
|
912
|
+
pos++;
|
|
913
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
914
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
915
|
+
// This is an incorrectly-closed-comment parse error. Switch to the data
|
|
916
|
+
// state. Emit the current comment token.
|
|
917
|
+
let nextPos = pos + 1;
|
|
918
|
+
if (callbacks.comment !== undefined) {
|
|
919
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
920
|
+
}
|
|
921
|
+
state = STATE_DATA;
|
|
922
|
+
textStart = nextPos;
|
|
923
|
+
pos = nextPos;
|
|
924
|
+
} else {
|
|
925
|
+
// Anything else
|
|
926
|
+
// Append two U+002D HYPHEN-MINUS characters (-) and a U+0021 EXCLAMATION
|
|
927
|
+
// MARK character (!) to the comment token's data. Reconsume in the comment
|
|
928
|
+
// state.
|
|
929
|
+
state = STATE_COMMENT;
|
|
930
|
+
pos++;
|
|
931
|
+
}
|
|
932
|
+
break;
|
|
933
|
+
|
|
934
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
|
935
|
+
case STATE_BOGUS_COMMENT:
|
|
936
|
+
// Consume the next input character:
|
|
937
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
938
|
+
// Switch to the data state. Emit the current comment token.
|
|
939
|
+
if (cc === CC_GREATER_THAN) {
|
|
940
|
+
let nextPos = pos + 1;
|
|
941
|
+
if (callbacks.comment !== undefined) {
|
|
942
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
943
|
+
}
|
|
944
|
+
state = STATE_DATA;
|
|
945
|
+
textStart = nextPos;
|
|
946
|
+
pos = nextPos;
|
|
947
|
+
} else {
|
|
948
|
+
pos++;
|
|
949
|
+
}
|
|
950
|
+
break;
|
|
951
|
+
|
|
952
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-state
|
|
953
|
+
case STATE_COMMENT_LESS_THAN_SIGN:
|
|
954
|
+
// Consume the next input character:
|
|
955
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
956
|
+
// Append the current input character to the comment token's data. Switch to
|
|
957
|
+
// the comment less-than sign bang state.
|
|
958
|
+
if (cc === CC_EXCLAMATION_MARK) {
|
|
959
|
+
state = STATE_COMMENT_LESS_THAN_SIGN_BANG;
|
|
960
|
+
pos++;
|
|
961
|
+
} else if (cc === CC_LESS_THAN) {
|
|
962
|
+
// U+003C LESS-THAN SIGN (<)
|
|
963
|
+
// Append the current input character to the comment token's data.
|
|
964
|
+
pos++;
|
|
965
|
+
} else {
|
|
966
|
+
// Anything else
|
|
967
|
+
// Reconsume in the comment state.
|
|
968
|
+
state = STATE_COMMENT;
|
|
969
|
+
// Reconsume
|
|
970
|
+
}
|
|
971
|
+
break;
|
|
972
|
+
|
|
973
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-state
|
|
974
|
+
case STATE_COMMENT_LESS_THAN_SIGN_BANG:
|
|
975
|
+
// Consume the next input character:
|
|
976
|
+
// U+002D HYPHEN-MINUS (-)
|
|
977
|
+
// Switch to the comment less-than sign bang dash state.
|
|
978
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
979
|
+
state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH;
|
|
980
|
+
pos++;
|
|
981
|
+
} else {
|
|
982
|
+
// Anything else
|
|
983
|
+
// Reconsume in the comment state.
|
|
984
|
+
state = STATE_COMMENT;
|
|
985
|
+
// Reconsume
|
|
986
|
+
}
|
|
987
|
+
break;
|
|
988
|
+
|
|
989
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-state
|
|
990
|
+
case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH:
|
|
991
|
+
// Consume the next input character:
|
|
992
|
+
// U+002D HYPHEN-MINUS (-)
|
|
993
|
+
// Switch to the comment less-than sign bang dash dash state.
|
|
994
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
995
|
+
state = STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
|
|
996
|
+
pos++;
|
|
997
|
+
} else {
|
|
998
|
+
// Anything else
|
|
999
|
+
// Reconsume in the comment end dash state.
|
|
1000
|
+
state = STATE_COMMENT_END_DASH;
|
|
1001
|
+
// Reconsume
|
|
1002
|
+
}
|
|
1003
|
+
break;
|
|
1004
|
+
|
|
1005
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-dash-state
|
|
1006
|
+
case STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH:
|
|
1007
|
+
// Consume the next input character:
|
|
1008
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1009
|
+
// EOF
|
|
1010
|
+
// Reconsume in the comment end state.
|
|
1011
|
+
// Anything else
|
|
1012
|
+
// This is a nested-comment parse error. Reconsume in the comment end state.
|
|
1013
|
+
state = STATE_COMMENT_END;
|
|
1014
|
+
// Reconsume
|
|
1015
|
+
break;
|
|
1016
|
+
|
|
1017
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
|
|
1018
|
+
case STATE_DOCTYPE:
|
|
1019
|
+
// Consume the next input character:
|
|
1020
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1021
|
+
// U+000A LINE FEED (LF)
|
|
1022
|
+
// U+000C FORM FEED (FF)
|
|
1023
|
+
// U+0020 SPACE
|
|
1024
|
+
// Switch to the before DOCTYPE name state.
|
|
1025
|
+
if (isSpace(cc)) {
|
|
1026
|
+
state = STATE_BEFORE_DOCTYPE_NAME;
|
|
1027
|
+
pos++;
|
|
1028
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1029
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1030
|
+
// Reconsume in the before DOCTYPE name state.
|
|
1031
|
+
state = STATE_BEFORE_DOCTYPE_NAME;
|
|
1032
|
+
} else {
|
|
1033
|
+
// Anything else
|
|
1034
|
+
// This is a missing-whitespace-before-doctype-name parse error. Reconsume
|
|
1035
|
+
// in the before DOCTYPE name state.
|
|
1036
|
+
state = STATE_BEFORE_DOCTYPE_NAME;
|
|
1037
|
+
}
|
|
1038
|
+
break;
|
|
1039
|
+
|
|
1040
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
|
|
1041
|
+
case STATE_BEFORE_DOCTYPE_NAME:
|
|
1042
|
+
// Consume the next input character:
|
|
1043
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1044
|
+
// U+000A LINE FEED (LF)
|
|
1045
|
+
// U+000C FORM FEED (FF)
|
|
1046
|
+
// U+0020 SPACE
|
|
1047
|
+
// Ignore the character.
|
|
1048
|
+
if (isSpace(cc)) {
|
|
1049
|
+
pos++;
|
|
1050
|
+
} else if (cc === 0x00) {
|
|
1051
|
+
// U+0000 NULL
|
|
1052
|
+
// This is an unexpected-null-character parse error. Create a new DOCTYPE
|
|
1053
|
+
// token. Set the token's name to a U+FFFD REPLACEMENT CHARACTER character.
|
|
1054
|
+
// Switch to the DOCTYPE name state.
|
|
1055
|
+
state = STATE_DOCTYPE_NAME;
|
|
1056
|
+
pos++;
|
|
1057
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1058
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1059
|
+
// This is a missing-doctype-name parse error. Create a new DOCTYPE token.
|
|
1060
|
+
// Set its force-quirks flag to on. Switch to the data state. Emit the
|
|
1061
|
+
// current token.
|
|
1062
|
+
let nextPos = pos + 1;
|
|
1063
|
+
if (callbacks.doctype !== undefined) {
|
|
1064
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1065
|
+
}
|
|
1066
|
+
state = STATE_DATA;
|
|
1067
|
+
textStart = nextPos;
|
|
1068
|
+
pos = nextPos;
|
|
1069
|
+
} else {
|
|
1070
|
+
// ASCII upper alpha
|
|
1071
|
+
// Create a new DOCTYPE token. Set the token's name to the lowercase version
|
|
1072
|
+
// of the current input character (add 0x0020 to the character's code
|
|
1073
|
+
// point). Switch to the DOCTYPE name state.
|
|
1074
|
+
// Anything else
|
|
1075
|
+
// Create a new DOCTYPE token. Set the token's name to the current input
|
|
1076
|
+
// character. Switch to the DOCTYPE name state.
|
|
1077
|
+
state = STATE_DOCTYPE_NAME;
|
|
1078
|
+
pos++;
|
|
1079
|
+
}
|
|
1080
|
+
break;
|
|
1081
|
+
|
|
1082
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
|
|
1083
|
+
case STATE_DOCTYPE_NAME:
|
|
1084
|
+
// Consume the next input character:
|
|
1085
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1086
|
+
// U+000A LINE FEED (LF)
|
|
1087
|
+
// U+000C FORM FEED (FF)
|
|
1088
|
+
// U+0020 SPACE
|
|
1089
|
+
// Switch to the after DOCTYPE name state.
|
|
1090
|
+
if (isSpace(cc)) {
|
|
1091
|
+
state = STATE_AFTER_DOCTYPE_NAME;
|
|
1092
|
+
pos++;
|
|
1093
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1094
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1095
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1096
|
+
let nextPos = pos + 1;
|
|
1097
|
+
if (callbacks.doctype !== undefined) {
|
|
1098
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1099
|
+
}
|
|
1100
|
+
state = STATE_DATA;
|
|
1101
|
+
textStart = nextPos;
|
|
1102
|
+
pos = nextPos;
|
|
1103
|
+
} else if (cc === 0x00) {
|
|
1104
|
+
// U+0000 NULL
|
|
1105
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1106
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's name.
|
|
1107
|
+
pos++;
|
|
1108
|
+
} else {
|
|
1109
|
+
// ASCII upper alpha
|
|
1110
|
+
// Append the lowercase version of the current input character (add 0x0020
|
|
1111
|
+
// to the character's code point) to the current DOCTYPE token's name.
|
|
1112
|
+
// Anything else
|
|
1113
|
+
// Append the current input character to the current DOCTYPE token's name.
|
|
1114
|
+
pos++;
|
|
1115
|
+
}
|
|
1116
|
+
break;
|
|
1117
|
+
|
|
1118
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
|
|
1119
|
+
case STATE_AFTER_DOCTYPE_NAME:
|
|
1120
|
+
// Consume the next input character:
|
|
1121
|
+
if (isSpace(cc)) {
|
|
1122
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1123
|
+
// U+000A LINE FEED (LF)
|
|
1124
|
+
// U+000C FORM FEED (FF)
|
|
1125
|
+
// U+0020 SPACE
|
|
1126
|
+
// Ignore the character.
|
|
1127
|
+
pos++;
|
|
1128
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1129
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1130
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1131
|
+
let nextPos = pos + 1;
|
|
1132
|
+
if (callbacks.doctype !== undefined) {
|
|
1133
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1134
|
+
}
|
|
1135
|
+
state = STATE_DATA;
|
|
1136
|
+
textStart = nextPos;
|
|
1137
|
+
pos = nextPos;
|
|
1138
|
+
} else if (
|
|
1139
|
+
pos + 5 < len &&
|
|
1140
|
+
(cc === 0x50 || cc === 0x70) /* P or p */ &&
|
|
1141
|
+
(input.charCodeAt(pos + 1) | 0x20) === 0x75 /* u */ &&
|
|
1142
|
+
(input.charCodeAt(pos + 2) | 0x20) === 0x62 /* b */ &&
|
|
1143
|
+
(input.charCodeAt(pos + 3) | 0x20) === 0x6c /* l */ &&
|
|
1144
|
+
(input.charCodeAt(pos + 4) | 0x20) === 0x69 /* i */ &&
|
|
1145
|
+
(input.charCodeAt(pos + 5) | 0x20) === 0x63 /* c */
|
|
1146
|
+
) {
|
|
1147
|
+
// ASCII case-insensitive match for the word "PUBLIC"
|
|
1148
|
+
pos += 6;
|
|
1149
|
+
state = STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD;
|
|
1150
|
+
} else if (
|
|
1151
|
+
pos + 5 < len &&
|
|
1152
|
+
(cc === 0x53 || cc === 0x73) /* S or s */ &&
|
|
1153
|
+
(input.charCodeAt(pos + 1) | 0x20) === 0x79 /* y */ &&
|
|
1154
|
+
(input.charCodeAt(pos + 2) | 0x20) === 0x73 /* s */ &&
|
|
1155
|
+
(input.charCodeAt(pos + 3) | 0x20) === 0x74 /* t */ &&
|
|
1156
|
+
(input.charCodeAt(pos + 4) | 0x20) === 0x65 /* e */ &&
|
|
1157
|
+
(input.charCodeAt(pos + 5) | 0x20) === 0x6d /* m */
|
|
1158
|
+
) {
|
|
1159
|
+
// ASCII case-insensitive match for the word "SYSTEM"
|
|
1160
|
+
pos += 6;
|
|
1161
|
+
state = STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD;
|
|
1162
|
+
} else {
|
|
1163
|
+
// Anything else
|
|
1164
|
+
// This is an invalid-character-sequence-after-doctype-name parse error. Set
|
|
1165
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1166
|
+
// bogus DOCTYPE state.
|
|
1167
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1168
|
+
}
|
|
1169
|
+
break;
|
|
1170
|
+
|
|
1171
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
|
|
1172
|
+
case STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD:
|
|
1173
|
+
// Consume the next input character:
|
|
1174
|
+
if (isSpace(cc)) {
|
|
1175
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1176
|
+
// U+000A LINE FEED (LF)
|
|
1177
|
+
// U+000C FORM FEED (FF)
|
|
1178
|
+
// U+0020 SPACE
|
|
1179
|
+
// Switch to the before DOCTYPE public identifier state.
|
|
1180
|
+
state = STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
1181
|
+
pos++;
|
|
1182
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1183
|
+
// U+0022 QUOTATION MARK (")
|
|
1184
|
+
// This is a missing-whitespace-after-doctype-public-keyword parse error.
|
|
1185
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1186
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1187
|
+
// (double-quoted) state.
|
|
1188
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
|
1189
|
+
pos++;
|
|
1190
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1191
|
+
// U+0027 APOSTROPHE (')
|
|
1192
|
+
// This is a missing-whitespace-after-doctype-public-keyword parse error.
|
|
1193
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1194
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1195
|
+
// (single-quoted) state.
|
|
1196
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
|
1197
|
+
pos++;
|
|
1198
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1199
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1200
|
+
// This is a missing-doctype-public-identifier parse error. Set the current
|
|
1201
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1202
|
+
// the current DOCTYPE token.
|
|
1203
|
+
let nextPos = pos + 1;
|
|
1204
|
+
if (callbacks.doctype !== undefined) {
|
|
1205
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1206
|
+
}
|
|
1207
|
+
state = STATE_DATA;
|
|
1208
|
+
textStart = nextPos;
|
|
1209
|
+
pos = nextPos;
|
|
1210
|
+
} else {
|
|
1211
|
+
// Anything else
|
|
1212
|
+
// This is a missing-quote-before-doctype-public-identifier parse error. Set
|
|
1213
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1214
|
+
// bogus DOCTYPE state.
|
|
1215
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1216
|
+
}
|
|
1217
|
+
break;
|
|
1218
|
+
|
|
1219
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
|
|
1220
|
+
case STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
|
|
1221
|
+
// Consume the next input character:
|
|
1222
|
+
if (isSpace(cc)) {
|
|
1223
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1224
|
+
// U+000A LINE FEED (LF)
|
|
1225
|
+
// U+000C FORM FEED (FF)
|
|
1226
|
+
// U+0020 SPACE
|
|
1227
|
+
// Ignore the character.
|
|
1228
|
+
pos++;
|
|
1229
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1230
|
+
// U+0022 QUOTATION MARK (")
|
|
1231
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1232
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1233
|
+
// (double-quoted) state.
|
|
1234
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
|
1235
|
+
pos++;
|
|
1236
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1237
|
+
// U+0027 APOSTROPHE (')
|
|
1238
|
+
// Set the current DOCTYPE token's public identifier to the empty string
|
|
1239
|
+
// (not missing), then switch to the DOCTYPE public identifier
|
|
1240
|
+
// (single-quoted) state.
|
|
1241
|
+
state = STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
|
1242
|
+
pos++;
|
|
1243
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1244
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1245
|
+
// This is a missing-doctype-public-identifier parse error. Set the current
|
|
1246
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1247
|
+
// the current DOCTYPE token.
|
|
1248
|
+
let nextPos = pos + 1;
|
|
1249
|
+
if (callbacks.doctype !== undefined) {
|
|
1250
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1251
|
+
}
|
|
1252
|
+
state = STATE_DATA;
|
|
1253
|
+
textStart = nextPos;
|
|
1254
|
+
pos = nextPos;
|
|
1255
|
+
} else {
|
|
1256
|
+
// Anything else
|
|
1257
|
+
// This is a missing-quote-before-doctype-public-identifier parse error. Set
|
|
1258
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1259
|
+
// bogus DOCTYPE state.
|
|
1260
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1261
|
+
}
|
|
1262
|
+
break;
|
|
1263
|
+
|
|
1264
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
|
|
1265
|
+
case STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
|
|
1266
|
+
// Consume the next input character:
|
|
1267
|
+
if (cc === CC_QUOTATION_MARK) {
|
|
1268
|
+
// U+0022 QUOTATION MARK (")
|
|
1269
|
+
// Switch to the after DOCTYPE public identifier state.
|
|
1270
|
+
state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
1271
|
+
pos++;
|
|
1272
|
+
} else if (cc === 0x00) {
|
|
1273
|
+
// U+0000 NULL
|
|
1274
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1275
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's public
|
|
1276
|
+
// identifier.
|
|
1277
|
+
pos++;
|
|
1278
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1279
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1280
|
+
// This is an abrupt-doctype-public-identifier parse error. Set the current
|
|
1281
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1282
|
+
// the current DOCTYPE token.
|
|
1283
|
+
let nextPos = pos + 1;
|
|
1284
|
+
if (callbacks.doctype !== undefined) {
|
|
1285
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1286
|
+
}
|
|
1287
|
+
state = STATE_DATA;
|
|
1288
|
+
textStart = nextPos;
|
|
1289
|
+
pos = nextPos;
|
|
1290
|
+
} else {
|
|
1291
|
+
// Anything else
|
|
1292
|
+
// Append the current input character to the current DOCTYPE token's public
|
|
1293
|
+
// identifier.
|
|
1294
|
+
pos++;
|
|
1295
|
+
}
|
|
1296
|
+
break;
|
|
1297
|
+
|
|
1298
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
|
|
1299
|
+
case STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
|
|
1300
|
+
// Consume the next input character:
|
|
1301
|
+
if (cc === CC_APOSTROPHE) {
|
|
1302
|
+
// U+0027 APOSTROPHE (')
|
|
1303
|
+
// Switch to the after DOCTYPE public identifier state.
|
|
1304
|
+
state = STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
|
1305
|
+
pos++;
|
|
1306
|
+
} else if (cc === 0x00) {
|
|
1307
|
+
// U+0000 NULL
|
|
1308
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1309
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's public
|
|
1310
|
+
// identifier.
|
|
1311
|
+
pos++;
|
|
1312
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1313
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1314
|
+
// This is an abrupt-doctype-public-identifier parse error. Set the current
|
|
1315
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1316
|
+
// the current DOCTYPE token.
|
|
1317
|
+
let nextPos = pos + 1;
|
|
1318
|
+
if (callbacks.doctype !== undefined) {
|
|
1319
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1320
|
+
}
|
|
1321
|
+
state = STATE_DATA;
|
|
1322
|
+
textStart = nextPos;
|
|
1323
|
+
pos = nextPos;
|
|
1324
|
+
} else {
|
|
1325
|
+
// Anything else
|
|
1326
|
+
// Append the current input character to the current DOCTYPE token's public
|
|
1327
|
+
// identifier.
|
|
1328
|
+
pos++;
|
|
1329
|
+
}
|
|
1330
|
+
break;
|
|
1331
|
+
|
|
1332
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
|
|
1333
|
+
case STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
|
|
1334
|
+
// Consume the next input character:
|
|
1335
|
+
if (isSpace(cc)) {
|
|
1336
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1337
|
+
// U+000A LINE FEED (LF)
|
|
1338
|
+
// U+000C FORM FEED (FF)
|
|
1339
|
+
// U+0020 SPACE
|
|
1340
|
+
// Switch to the between DOCTYPE public and system identifiers state.
|
|
1341
|
+
state = STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
|
|
1342
|
+
pos++;
|
|
1343
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1344
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1345
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1346
|
+
let nextPos = pos + 1;
|
|
1347
|
+
if (callbacks.doctype !== undefined) {
|
|
1348
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1349
|
+
}
|
|
1350
|
+
state = STATE_DATA;
|
|
1351
|
+
textStart = nextPos;
|
|
1352
|
+
pos = nextPos;
|
|
1353
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1354
|
+
// U+0022 QUOTATION MARK (")
|
|
1355
|
+
// This is a missing-whitespace-between-doctype-public-and-system-identifiers
|
|
1356
|
+
// parse error. Set the current DOCTYPE token's system
|
|
1357
|
+
// identifier to the empty string (not missing), then switch
|
|
1358
|
+
// to the DOCTYPE system identifier (double-quoted) state.
|
|
1359
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1360
|
+
pos++;
|
|
1361
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1362
|
+
// U+0027 APOSTROPHE (')
|
|
1363
|
+
// This is a missing-whitespace-between-doctype-public-and-system-identifiers
|
|
1364
|
+
// parse error. Set the current DOCTYPE token's system
|
|
1365
|
+
// identifier to the empty string (not missing), then switch
|
|
1366
|
+
// to the DOCTYPE system identifier (single-quoted) state.
|
|
1367
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1368
|
+
pos++;
|
|
1369
|
+
} else {
|
|
1370
|
+
// Anything else
|
|
1371
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1372
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1373
|
+
// bogus DOCTYPE state.
|
|
1374
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1375
|
+
}
|
|
1376
|
+
break;
|
|
1377
|
+
|
|
1378
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
|
|
1379
|
+
case STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
|
|
1380
|
+
// Consume the next input character:
|
|
1381
|
+
if (isSpace(cc)) {
|
|
1382
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1383
|
+
// U+000A LINE FEED (LF)
|
|
1384
|
+
// U+000C FORM FEED (FF)
|
|
1385
|
+
// U+0020 SPACE
|
|
1386
|
+
// Ignore the character.
|
|
1387
|
+
pos++;
|
|
1388
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1389
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1390
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1391
|
+
let nextPos = pos + 1;
|
|
1392
|
+
if (callbacks.doctype !== undefined) {
|
|
1393
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1394
|
+
}
|
|
1395
|
+
state = STATE_DATA;
|
|
1396
|
+
textStart = nextPos;
|
|
1397
|
+
pos = nextPos;
|
|
1398
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1399
|
+
// U+0022 QUOTATION MARK (")
|
|
1400
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1401
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1402
|
+
// (double-quoted) state.
|
|
1403
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1404
|
+
pos++;
|
|
1405
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1406
|
+
// U+0027 APOSTROPHE (')
|
|
1407
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1408
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1409
|
+
// (single-quoted) state.
|
|
1410
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1411
|
+
pos++;
|
|
1412
|
+
} else {
|
|
1413
|
+
// Anything else
|
|
1414
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1415
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1416
|
+
// bogus DOCTYPE state.
|
|
1417
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1418
|
+
}
|
|
1419
|
+
break;
|
|
1420
|
+
|
|
1421
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
|
|
1422
|
+
case STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD:
|
|
1423
|
+
// Consume the next input character:
|
|
1424
|
+
if (isSpace(cc)) {
|
|
1425
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1426
|
+
// U+000A LINE FEED (LF)
|
|
1427
|
+
// U+000C FORM FEED (FF)
|
|
1428
|
+
// U+0020 SPACE
|
|
1429
|
+
// Switch to the before DOCTYPE system identifier state.
|
|
1430
|
+
state = STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
1431
|
+
pos++;
|
|
1432
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1433
|
+
// U+0022 QUOTATION MARK (")
|
|
1434
|
+
// This is a missing-whitespace-after-doctype-system-keyword parse error.
|
|
1435
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1436
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1437
|
+
// (double-quoted) state.
|
|
1438
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1439
|
+
pos++;
|
|
1440
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1441
|
+
// U+0027 APOSTROPHE (')
|
|
1442
|
+
// This is a missing-whitespace-after-doctype-system-keyword parse error.
|
|
1443
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1444
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1445
|
+
// (single-quoted) state.
|
|
1446
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1447
|
+
pos++;
|
|
1448
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1449
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1450
|
+
// This is a missing-doctype-system-identifier parse error. Set the current
|
|
1451
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1452
|
+
// the current DOCTYPE token.
|
|
1453
|
+
let nextPos = pos + 1;
|
|
1454
|
+
if (callbacks.doctype !== undefined) {
|
|
1455
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1456
|
+
}
|
|
1457
|
+
state = STATE_DATA;
|
|
1458
|
+
textStart = nextPos;
|
|
1459
|
+
pos = nextPos;
|
|
1460
|
+
} else {
|
|
1461
|
+
// Anything else
|
|
1462
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1463
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1464
|
+
// bogus DOCTYPE state.
|
|
1465
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1466
|
+
}
|
|
1467
|
+
break;
|
|
1468
|
+
|
|
1469
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
|
|
1470
|
+
case STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
|
|
1471
|
+
// Consume the next input character:
|
|
1472
|
+
if (isSpace(cc)) {
|
|
1473
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1474
|
+
// U+000A LINE FEED (LF)
|
|
1475
|
+
// U+000C FORM FEED (FF)
|
|
1476
|
+
// U+0020 SPACE
|
|
1477
|
+
// Ignore the character.
|
|
1478
|
+
pos++;
|
|
1479
|
+
} else if (cc === CC_QUOTATION_MARK) {
|
|
1480
|
+
// U+0022 QUOTATION MARK (")
|
|
1481
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1482
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1483
|
+
// (double-quoted) state.
|
|
1484
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
|
1485
|
+
pos++;
|
|
1486
|
+
} else if (cc === CC_APOSTROPHE) {
|
|
1487
|
+
// U+0027 APOSTROPHE (')
|
|
1488
|
+
// Set the current DOCTYPE token's system identifier to the empty string
|
|
1489
|
+
// (not missing), then switch to the DOCTYPE system identifier
|
|
1490
|
+
// (single-quoted) state.
|
|
1491
|
+
state = STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
|
1492
|
+
pos++;
|
|
1493
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1494
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1495
|
+
// This is a missing-doctype-system-identifier parse error. Set the current
|
|
1496
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1497
|
+
// the current DOCTYPE token.
|
|
1498
|
+
let nextPos = pos + 1;
|
|
1499
|
+
if (callbacks.doctype !== undefined) {
|
|
1500
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1501
|
+
}
|
|
1502
|
+
state = STATE_DATA;
|
|
1503
|
+
textStart = nextPos;
|
|
1504
|
+
pos = nextPos;
|
|
1505
|
+
} else {
|
|
1506
|
+
// Anything else
|
|
1507
|
+
// This is a missing-quote-before-doctype-system-identifier parse error. Set
|
|
1508
|
+
// the current DOCTYPE token's force-quirks flag to on. Reconsume in the
|
|
1509
|
+
// bogus DOCTYPE state.
|
|
1510
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1511
|
+
}
|
|
1512
|
+
break;
|
|
1513
|
+
|
|
1514
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
|
|
1515
|
+
case STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
|
|
1516
|
+
// Consume the next input character:
|
|
1517
|
+
if (cc === CC_QUOTATION_MARK) {
|
|
1518
|
+
// U+0022 QUOTATION MARK (")
|
|
1519
|
+
// Switch to the after DOCTYPE system identifier state.
|
|
1520
|
+
state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
1521
|
+
pos++;
|
|
1522
|
+
} else if (cc === 0x00) {
|
|
1523
|
+
// U+0000 NULL
|
|
1524
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1525
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's system
|
|
1526
|
+
// identifier.
|
|
1527
|
+
pos++;
|
|
1528
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1529
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1530
|
+
// This is an abrupt-doctype-system-identifier parse error. Set the current
|
|
1531
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1532
|
+
// the current DOCTYPE token.
|
|
1533
|
+
let nextPos = pos + 1;
|
|
1534
|
+
if (callbacks.doctype !== undefined) {
|
|
1535
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1536
|
+
}
|
|
1537
|
+
state = STATE_DATA;
|
|
1538
|
+
textStart = nextPos;
|
|
1539
|
+
pos = nextPos;
|
|
1540
|
+
} else {
|
|
1541
|
+
// Anything else
|
|
1542
|
+
// Append the current input character to the current DOCTYPE token's system
|
|
1543
|
+
// identifier.
|
|
1544
|
+
pos++;
|
|
1545
|
+
}
|
|
1546
|
+
break;
|
|
1547
|
+
|
|
1548
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
|
|
1549
|
+
case STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
|
|
1550
|
+
// Consume the next input character:
|
|
1551
|
+
if (cc === CC_APOSTROPHE) {
|
|
1552
|
+
// U+0027 APOSTROPHE (')
|
|
1553
|
+
// Switch to the after DOCTYPE system identifier state.
|
|
1554
|
+
state = STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
|
1555
|
+
pos++;
|
|
1556
|
+
} else if (cc === 0x00) {
|
|
1557
|
+
// U+0000 NULL
|
|
1558
|
+
// This is an unexpected-null-character parse error. Append a U+FFFD
|
|
1559
|
+
// REPLACEMENT CHARACTER character to the current DOCTYPE token's system
|
|
1560
|
+
// identifier.
|
|
1561
|
+
pos++;
|
|
1562
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1563
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1564
|
+
// This is an abrupt-doctype-system-identifier parse error. Set the current
|
|
1565
|
+
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
|
|
1566
|
+
// the current DOCTYPE token.
|
|
1567
|
+
let nextPos = pos + 1;
|
|
1568
|
+
if (callbacks.doctype !== undefined) {
|
|
1569
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1570
|
+
}
|
|
1571
|
+
state = STATE_DATA;
|
|
1572
|
+
textStart = nextPos;
|
|
1573
|
+
pos = nextPos;
|
|
1574
|
+
} else {
|
|
1575
|
+
// Anything else
|
|
1576
|
+
// Append the current input character to the current DOCTYPE token's system
|
|
1577
|
+
// identifier.
|
|
1578
|
+
pos++;
|
|
1579
|
+
}
|
|
1580
|
+
break;
|
|
1581
|
+
|
|
1582
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
|
|
1583
|
+
case STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
|
|
1584
|
+
// Consume the next input character:
|
|
1585
|
+
if (isSpace(cc)) {
|
|
1586
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1587
|
+
// U+000A LINE FEED (LF)
|
|
1588
|
+
// U+000C FORM FEED (FF)
|
|
1589
|
+
// U+0020 SPACE
|
|
1590
|
+
// Ignore the character.
|
|
1591
|
+
pos++;
|
|
1592
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1593
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1594
|
+
// Switch to the data state. Emit the current DOCTYPE token.
|
|
1595
|
+
let nextPos = pos + 1;
|
|
1596
|
+
if (callbacks.doctype !== undefined) {
|
|
1597
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1598
|
+
}
|
|
1599
|
+
state = STATE_DATA;
|
|
1600
|
+
textStart = nextPos;
|
|
1601
|
+
pos = nextPos;
|
|
1602
|
+
} else {
|
|
1603
|
+
// Anything else
|
|
1604
|
+
// This is an unexpected-character-after-doctype-system-identifier parse
|
|
1605
|
+
// error. Reconsume in the bogus DOCTYPE state. (This does not set the
|
|
1606
|
+
// current DOCTYPE token's force-quirks flag to on.)
|
|
1607
|
+
state = STATE_BOGUS_DOCTYPE;
|
|
1608
|
+
}
|
|
1609
|
+
break;
|
|
1610
|
+
|
|
1611
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
|
|
1612
|
+
case STATE_BOGUS_DOCTYPE:
|
|
1613
|
+
// Consume the next input character:
|
|
1614
|
+
if (cc === CC_GREATER_THAN) {
|
|
1615
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1616
|
+
// Switch to the data state. Emit the DOCTYPE token.
|
|
1617
|
+
let nextPos = pos + 1;
|
|
1618
|
+
if (callbacks.doctype !== undefined) {
|
|
1619
|
+
nextPos = callbacks.doctype(input, commentStart, pos + 1);
|
|
1620
|
+
}
|
|
1621
|
+
state = STATE_DATA;
|
|
1622
|
+
textStart = nextPos;
|
|
1623
|
+
pos = nextPos;
|
|
1624
|
+
} else if (cc === 0x00) {
|
|
1625
|
+
// U+0000 NULL
|
|
1626
|
+
// This is an unexpected-null-character parse error. Ignore the character.
|
|
1627
|
+
pos++;
|
|
1628
|
+
} else {
|
|
1629
|
+
// Anything else
|
|
1630
|
+
// Ignore the character.
|
|
1631
|
+
pos++;
|
|
1632
|
+
}
|
|
1633
|
+
break;
|
|
1634
|
+
|
|
1635
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
|
|
1636
|
+
case STATE_CDATA_SECTION:
|
|
1637
|
+
// Consume the next input character:
|
|
1638
|
+
// U+005D RIGHT SQUARE BRACKET (])
|
|
1639
|
+
// Switch to the CDATA section bracket state.
|
|
1640
|
+
if (cc === CC_RIGHT_SQUARE_BRACKET) {
|
|
1641
|
+
state = STATE_CDATA_SECTION_BRACKET;
|
|
1642
|
+
pos++;
|
|
1643
|
+
} else {
|
|
1644
|
+
// Anything else
|
|
1645
|
+
// Emit the current input character as a character token.
|
|
1646
|
+
pos++;
|
|
1647
|
+
}
|
|
1648
|
+
break;
|
|
1649
|
+
|
|
1650
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state
|
|
1651
|
+
case STATE_CDATA_SECTION_BRACKET:
|
|
1652
|
+
// Consume the next input character:
|
|
1653
|
+
// U+005D RIGHT SQUARE BRACKET (])
|
|
1654
|
+
// Switch to the CDATA section end state.
|
|
1655
|
+
if (cc === CC_RIGHT_SQUARE_BRACKET) {
|
|
1656
|
+
state = STATE_CDATA_SECTION_END;
|
|
1657
|
+
pos++;
|
|
1658
|
+
} else {
|
|
1659
|
+
// Anything else
|
|
1660
|
+
// Emit a U+005D RIGHT SQUARE BRACKET character token. Reconsume in the
|
|
1661
|
+
// CDATA section state.
|
|
1662
|
+
state = STATE_CDATA_SECTION;
|
|
1663
|
+
// Reconsume
|
|
1664
|
+
}
|
|
1665
|
+
break;
|
|
1666
|
+
|
|
1667
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state
|
|
1668
|
+
case STATE_CDATA_SECTION_END:
|
|
1669
|
+
// Consume the next input character:
|
|
1670
|
+
// U+005D RIGHT SQUARE BRACKET (])
|
|
1671
|
+
// Emit a U+005D RIGHT SQUARE BRACKET character token.
|
|
1672
|
+
if (cc === CC_RIGHT_SQUARE_BRACKET) {
|
|
1673
|
+
pos++;
|
|
1674
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1675
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1676
|
+
// Switch to the data state.
|
|
1677
|
+
let nextPos = pos + 1;
|
|
1678
|
+
if (callbacks.comment !== undefined) {
|
|
1679
|
+
nextPos = callbacks.comment(input, commentStart, pos + 1);
|
|
1680
|
+
}
|
|
1681
|
+
state = STATE_DATA;
|
|
1682
|
+
textStart = nextPos;
|
|
1683
|
+
pos = nextPos;
|
|
1684
|
+
} else {
|
|
1685
|
+
// Anything else
|
|
1686
|
+
// Emit two U+005D RIGHT SQUARE BRACKET character tokens. Reconsume in the
|
|
1687
|
+
// CDATA section state.
|
|
1688
|
+
state = STATE_CDATA_SECTION;
|
|
1689
|
+
// Reconsume
|
|
1690
|
+
}
|
|
1691
|
+
break;
|
|
1692
|
+
|
|
1693
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
|
|
1694
|
+
case STATE_RCDATA:
|
|
1695
|
+
// Consume the next input character:
|
|
1696
|
+
// U+003C LESS-THAN SIGN (<)
|
|
1697
|
+
// Switch to the RCDATA less-than sign state.
|
|
1698
|
+
if (cc === CC_LESS_THAN) {
|
|
1699
|
+
tagStart = pos;
|
|
1700
|
+
state = STATE_RCDATA_LESS_THAN_SIGN;
|
|
1701
|
+
pos++;
|
|
1702
|
+
} else {
|
|
1703
|
+
// Anything else
|
|
1704
|
+
// Emit the current input character as a character token.
|
|
1705
|
+
pos++;
|
|
1706
|
+
}
|
|
1707
|
+
break;
|
|
1708
|
+
|
|
1709
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
|
|
1710
|
+
case STATE_RCDATA_LESS_THAN_SIGN:
|
|
1711
|
+
// Consume the next input character:
|
|
1712
|
+
// U+002F SOLIDUS (/)
|
|
1713
|
+
// Set the temporary buffer to the empty string. Switch to the RCDATA end
|
|
1714
|
+
// tag open state.
|
|
1715
|
+
if (cc === CC_SOLIDUS) {
|
|
1716
|
+
tempBuffer = "";
|
|
1717
|
+
state = STATE_RCDATA_END_TAG_OPEN;
|
|
1718
|
+
pos++;
|
|
1719
|
+
} else {
|
|
1720
|
+
// Anything else
|
|
1721
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RCDATA
|
|
1722
|
+
// state.
|
|
1723
|
+
state = STATE_RCDATA;
|
|
1724
|
+
// Reconsume
|
|
1725
|
+
}
|
|
1726
|
+
break;
|
|
1727
|
+
|
|
1728
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
|
|
1729
|
+
case STATE_RCDATA_END_TAG_OPEN:
|
|
1730
|
+
// Consume the next input character:
|
|
1731
|
+
// ASCII alpha
|
|
1732
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
1733
|
+
// Reconsume in the RCDATA end tag name state.
|
|
1734
|
+
if (isAsciiAlpha(cc)) {
|
|
1735
|
+
tagNameStart = pos;
|
|
1736
|
+
state = STATE_RCDATA_END_TAG_NAME;
|
|
1737
|
+
// Reconsume
|
|
1738
|
+
} else {
|
|
1739
|
+
// Anything else
|
|
1740
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
1741
|
+
// character token. Reconsume in the RCDATA state.
|
|
1742
|
+
state = STATE_RCDATA;
|
|
1743
|
+
// Reconsume
|
|
1744
|
+
}
|
|
1745
|
+
break;
|
|
1746
|
+
|
|
1747
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
|
|
1748
|
+
case STATE_RCDATA_END_TAG_NAME:
|
|
1749
|
+
// Consume the next input character:
|
|
1750
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1751
|
+
// U+000A LINE FEED (LF)
|
|
1752
|
+
// U+000C FORM FEED (FF)
|
|
1753
|
+
// U+0020 SPACE
|
|
1754
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
1755
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
1756
|
+
// "anything else" entry below.
|
|
1757
|
+
if (isSpace(cc)) {
|
|
1758
|
+
tagNameEnd = pos;
|
|
1759
|
+
if (
|
|
1760
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
1761
|
+
lastOpenTagName
|
|
1762
|
+
) {
|
|
1763
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
1764
|
+
pos++;
|
|
1765
|
+
} else {
|
|
1766
|
+
state = STATE_RCDATA;
|
|
1767
|
+
// Reconsume
|
|
1768
|
+
}
|
|
1769
|
+
} else if (cc === CC_SOLIDUS) {
|
|
1770
|
+
// U+002F SOLIDUS (/)
|
|
1771
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
1772
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
1773
|
+
// "anything else" entry below.
|
|
1774
|
+
tagNameEnd = pos;
|
|
1775
|
+
if (
|
|
1776
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
1777
|
+
lastOpenTagName
|
|
1778
|
+
) {
|
|
1779
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
1780
|
+
pos++;
|
|
1781
|
+
} else {
|
|
1782
|
+
state = STATE_RCDATA;
|
|
1783
|
+
// Reconsume
|
|
1784
|
+
}
|
|
1785
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1786
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1787
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
1788
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
1789
|
+
// per the "anything else" entry below.
|
|
1790
|
+
tagNameEnd = pos;
|
|
1791
|
+
if (
|
|
1792
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
1793
|
+
lastOpenTagName
|
|
1794
|
+
) {
|
|
1795
|
+
flushText(tagStart);
|
|
1796
|
+
state = STATE_DATA;
|
|
1797
|
+
pos = emitCloseTag(pos + 1);
|
|
1798
|
+
} else {
|
|
1799
|
+
state = STATE_RCDATA;
|
|
1800
|
+
// Reconsume
|
|
1801
|
+
}
|
|
1802
|
+
} else if (isAsciiAlpha(cc)) {
|
|
1803
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
1804
|
+
// Append the lowercase version of the current input character to the
|
|
1805
|
+
// current tag token's tag name. Append the current input character to
|
|
1806
|
+
// the temporary buffer.
|
|
1807
|
+
pos++;
|
|
1808
|
+
} else {
|
|
1809
|
+
// Anything else
|
|
1810
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
1811
|
+
// token, and a character token for each of the characters in the temporary
|
|
1812
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
1813
|
+
// RCDATA state.
|
|
1814
|
+
state = STATE_RCDATA;
|
|
1815
|
+
// Reconsume
|
|
1816
|
+
}
|
|
1817
|
+
break;
|
|
1818
|
+
|
|
1819
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
|
|
1820
|
+
case STATE_RAWTEXT:
|
|
1821
|
+
// Consume the next input character:
|
|
1822
|
+
// U+003C LESS-THAN SIGN (<)
|
|
1823
|
+
// Switch to the RAWTEXT less-than sign state.
|
|
1824
|
+
if (cc === CC_LESS_THAN) {
|
|
1825
|
+
tagStart = pos;
|
|
1826
|
+
state = STATE_RAWTEXT_LESS_THAN_SIGN;
|
|
1827
|
+
pos++;
|
|
1828
|
+
} else {
|
|
1829
|
+
pos++;
|
|
1830
|
+
}
|
|
1831
|
+
break;
|
|
1832
|
+
|
|
1833
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
|
|
1834
|
+
case STATE_RAWTEXT_LESS_THAN_SIGN:
|
|
1835
|
+
// Consume the next input character:
|
|
1836
|
+
// U+002F SOLIDUS (/)
|
|
1837
|
+
// Set the temporary buffer to the empty string. Switch to the RAWTEXT end
|
|
1838
|
+
// tag open state.
|
|
1839
|
+
if (cc === CC_SOLIDUS) {
|
|
1840
|
+
tempBuffer = "";
|
|
1841
|
+
state = STATE_RAWTEXT_END_TAG_OPEN;
|
|
1842
|
+
pos++;
|
|
1843
|
+
} else {
|
|
1844
|
+
// Anything else
|
|
1845
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RAWTEXT
|
|
1846
|
+
// state.
|
|
1847
|
+
state = STATE_RAWTEXT;
|
|
1848
|
+
// Reconsume
|
|
1849
|
+
}
|
|
1850
|
+
break;
|
|
1851
|
+
|
|
1852
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
|
|
1853
|
+
case STATE_RAWTEXT_END_TAG_OPEN:
|
|
1854
|
+
// Consume the next input character:
|
|
1855
|
+
// ASCII alpha
|
|
1856
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
1857
|
+
// Reconsume in the RAWTEXT end tag name state.
|
|
1858
|
+
if (isAsciiAlpha(cc)) {
|
|
1859
|
+
tagNameStart = pos;
|
|
1860
|
+
state = STATE_RAWTEXT_END_TAG_NAME;
|
|
1861
|
+
// Reconsume
|
|
1862
|
+
} else {
|
|
1863
|
+
// Anything else
|
|
1864
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
1865
|
+
// character token. Reconsume in the RAWTEXT state.
|
|
1866
|
+
state = STATE_RAWTEXT;
|
|
1867
|
+
// Reconsume
|
|
1868
|
+
}
|
|
1869
|
+
break;
|
|
1870
|
+
|
|
1871
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
|
|
1872
|
+
case STATE_RAWTEXT_END_TAG_NAME:
|
|
1873
|
+
// Consume the next input character:
|
|
1874
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
1875
|
+
// U+000A LINE FEED (LF)
|
|
1876
|
+
// U+000C FORM FEED (FF)
|
|
1877
|
+
// U+0020 SPACE
|
|
1878
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
1879
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
1880
|
+
// "anything else" entry below.
|
|
1881
|
+
if (isSpace(cc)) {
|
|
1882
|
+
tagNameEnd = pos;
|
|
1883
|
+
if (
|
|
1884
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
1885
|
+
lastOpenTagName
|
|
1886
|
+
) {
|
|
1887
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
1888
|
+
pos++;
|
|
1889
|
+
} else {
|
|
1890
|
+
state = STATE_RAWTEXT;
|
|
1891
|
+
}
|
|
1892
|
+
} else if (cc === CC_SOLIDUS) {
|
|
1893
|
+
// U+002F SOLIDUS (/)
|
|
1894
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
1895
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
1896
|
+
// "anything else" entry below.
|
|
1897
|
+
tagNameEnd = pos;
|
|
1898
|
+
if (
|
|
1899
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
1900
|
+
lastOpenTagName
|
|
1901
|
+
) {
|
|
1902
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
1903
|
+
pos++;
|
|
1904
|
+
} else {
|
|
1905
|
+
state = STATE_RAWTEXT;
|
|
1906
|
+
}
|
|
1907
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
1908
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
1909
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
1910
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
1911
|
+
// per the "anything else" entry below.
|
|
1912
|
+
tagNameEnd = pos;
|
|
1913
|
+
if (
|
|
1914
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
1915
|
+
lastOpenTagName
|
|
1916
|
+
) {
|
|
1917
|
+
flushText(tagStart);
|
|
1918
|
+
state = STATE_DATA;
|
|
1919
|
+
pos = emitCloseTag(pos + 1);
|
|
1920
|
+
} else {
|
|
1921
|
+
state = STATE_RAWTEXT;
|
|
1922
|
+
}
|
|
1923
|
+
} else if (isAsciiAlpha(cc)) {
|
|
1924
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
1925
|
+
// Append the lowercase version of the current input character to the
|
|
1926
|
+
// current tag token's tag name. Append the current input character to
|
|
1927
|
+
// the temporary buffer.
|
|
1928
|
+
pos++;
|
|
1929
|
+
} else {
|
|
1930
|
+
// Anything else
|
|
1931
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
1932
|
+
// token, and a character token for each of the characters in the temporary
|
|
1933
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
1934
|
+
// RAWTEXT state.
|
|
1935
|
+
state = STATE_RAWTEXT;
|
|
1936
|
+
// Reconsume
|
|
1937
|
+
}
|
|
1938
|
+
break;
|
|
1939
|
+
|
|
1940
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
|
|
1941
|
+
case STATE_SCRIPT_DATA:
|
|
1942
|
+
// Consume the next input character:
|
|
1943
|
+
// U+003C LESS-THAN SIGN (<)
|
|
1944
|
+
// Switch to the script data less-than sign state.
|
|
1945
|
+
if (cc === CC_LESS_THAN) {
|
|
1946
|
+
tagStart = pos;
|
|
1947
|
+
state = STATE_SCRIPT_DATA_LESS_THAN_SIGN;
|
|
1948
|
+
pos++;
|
|
1949
|
+
} else {
|
|
1950
|
+
pos++;
|
|
1951
|
+
}
|
|
1952
|
+
break;
|
|
1953
|
+
|
|
1954
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
|
|
1955
|
+
case STATE_SCRIPT_DATA_LESS_THAN_SIGN:
|
|
1956
|
+
// Consume the next input character:
|
|
1957
|
+
// U+002F SOLIDUS (/)
|
|
1958
|
+
// Set the temporary buffer to the empty string. Switch to the script data
|
|
1959
|
+
// end tag open state.
|
|
1960
|
+
if (cc === CC_SOLIDUS) {
|
|
1961
|
+
tempBuffer = "";
|
|
1962
|
+
state = STATE_SCRIPT_DATA_END_TAG_OPEN;
|
|
1963
|
+
pos++;
|
|
1964
|
+
} else if (cc === CC_EXCLAMATION_MARK) {
|
|
1965
|
+
// U+0021 EXCLAMATION MARK (!)
|
|
1966
|
+
// Switch to the script data escape start state. Emit a U+003C LESS-THAN
|
|
1967
|
+
// SIGN character token and a U+0021 EXCLAMATION MARK character token.
|
|
1968
|
+
state = STATE_SCRIPT_DATA_ESCAPE_START;
|
|
1969
|
+
pos++;
|
|
1970
|
+
} else {
|
|
1971
|
+
// Anything else
|
|
1972
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
|
|
1973
|
+
// data state.
|
|
1974
|
+
state = STATE_SCRIPT_DATA;
|
|
1975
|
+
// Reconsume
|
|
1976
|
+
}
|
|
1977
|
+
break;
|
|
1978
|
+
|
|
1979
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
|
|
1980
|
+
case STATE_SCRIPT_DATA_END_TAG_OPEN:
|
|
1981
|
+
// Consume the next input character:
|
|
1982
|
+
// ASCII alpha
|
|
1983
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
1984
|
+
// Reconsume in the script data end tag name state.
|
|
1985
|
+
if (isAsciiAlpha(cc)) {
|
|
1986
|
+
tagNameStart = pos;
|
|
1987
|
+
state = STATE_SCRIPT_DATA_END_TAG_NAME;
|
|
1988
|
+
// Reconsume
|
|
1989
|
+
} else {
|
|
1990
|
+
// Anything else
|
|
1991
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
1992
|
+
// character token. Reconsume in the script data state.
|
|
1993
|
+
state = STATE_SCRIPT_DATA;
|
|
1994
|
+
// Reconsume
|
|
1995
|
+
}
|
|
1996
|
+
break;
|
|
1997
|
+
|
|
1998
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
|
|
1999
|
+
case STATE_SCRIPT_DATA_END_TAG_NAME:
|
|
2000
|
+
// Consume the next input character:
|
|
2001
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2002
|
+
// U+000A LINE FEED (LF)
|
|
2003
|
+
// U+000C FORM FEED (FF)
|
|
2004
|
+
// U+0020 SPACE
|
|
2005
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2006
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
2007
|
+
// "anything else" entry below.
|
|
2008
|
+
if (isSpace(cc)) {
|
|
2009
|
+
tagNameEnd = pos;
|
|
2010
|
+
if (
|
|
2011
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2012
|
+
lastOpenTagName
|
|
2013
|
+
) {
|
|
2014
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
2015
|
+
pos++;
|
|
2016
|
+
} else {
|
|
2017
|
+
state = STATE_SCRIPT_DATA;
|
|
2018
|
+
}
|
|
2019
|
+
} else if (cc === CC_SOLIDUS) {
|
|
2020
|
+
// U+002F SOLIDUS (/)
|
|
2021
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2022
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
2023
|
+
// "anything else" entry below.
|
|
2024
|
+
tagNameEnd = pos;
|
|
2025
|
+
if (
|
|
2026
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2027
|
+
lastOpenTagName
|
|
2028
|
+
) {
|
|
2029
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
2030
|
+
pos++;
|
|
2031
|
+
} else {
|
|
2032
|
+
state = STATE_SCRIPT_DATA;
|
|
2033
|
+
}
|
|
2034
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2035
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2036
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2037
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
2038
|
+
// per the "anything else" entry below.
|
|
2039
|
+
tagNameEnd = pos;
|
|
2040
|
+
if (
|
|
2041
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2042
|
+
lastOpenTagName
|
|
2043
|
+
) {
|
|
2044
|
+
flushText(tagStart);
|
|
2045
|
+
state = STATE_DATA;
|
|
2046
|
+
pos = emitCloseTag(pos + 1);
|
|
2047
|
+
} else {
|
|
2048
|
+
state = STATE_SCRIPT_DATA;
|
|
2049
|
+
}
|
|
2050
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2051
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
2052
|
+
pos++;
|
|
2053
|
+
} else {
|
|
2054
|
+
// Anything else
|
|
2055
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
2056
|
+
// token, and a character token for each of the characters in the temporary
|
|
2057
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
2058
|
+
// script data state.
|
|
2059
|
+
state = STATE_SCRIPT_DATA;
|
|
2060
|
+
// Reconsume
|
|
2061
|
+
}
|
|
2062
|
+
break;
|
|
2063
|
+
|
|
2064
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
|
|
2065
|
+
case STATE_SCRIPT_DATA_ESCAPE_START:
|
|
2066
|
+
// Consume the next input character:
|
|
2067
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2068
|
+
// Switch to the script data escape start dash state. Emit a U+002D
|
|
2069
|
+
// HYPHEN-MINUS character token.
|
|
2070
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2071
|
+
state = STATE_SCRIPT_DATA_ESCAPE_START_DASH;
|
|
2072
|
+
pos++;
|
|
2073
|
+
} else {
|
|
2074
|
+
// Anything else
|
|
2075
|
+
// Reconsume in the script data state.
|
|
2076
|
+
state = STATE_SCRIPT_DATA;
|
|
2077
|
+
// Reconsume
|
|
2078
|
+
}
|
|
2079
|
+
break;
|
|
2080
|
+
|
|
2081
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
|
|
2082
|
+
case STATE_SCRIPT_DATA_ESCAPE_START_DASH:
|
|
2083
|
+
// Consume the next input character:
|
|
2084
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2085
|
+
// Switch to the script data escaped dash dash state. Emit a U+002D
|
|
2086
|
+
// HYPHEN-MINUS character token.
|
|
2087
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2088
|
+
state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
|
|
2089
|
+
pos++;
|
|
2090
|
+
} else {
|
|
2091
|
+
// Anything else
|
|
2092
|
+
// Reconsume in the script data state.
|
|
2093
|
+
state = STATE_SCRIPT_DATA;
|
|
2094
|
+
// Reconsume
|
|
2095
|
+
}
|
|
2096
|
+
break;
|
|
2097
|
+
|
|
2098
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
|
|
2099
|
+
case STATE_SCRIPT_DATA_ESCAPED:
|
|
2100
|
+
// Consume the next input character:
|
|
2101
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2102
|
+
// Switch to the script data escaped dash state. Emit a U+002D HYPHEN-MINUS
|
|
2103
|
+
// character token.
|
|
2104
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2105
|
+
state = STATE_SCRIPT_DATA_ESCAPED_DASH;
|
|
2106
|
+
pos++;
|
|
2107
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2108
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2109
|
+
// Switch to the script data escaped less-than sign state.
|
|
2110
|
+
state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
2111
|
+
pos++;
|
|
2112
|
+
} else {
|
|
2113
|
+
// Anything else
|
|
2114
|
+
// Emit the current input character as a character token.
|
|
2115
|
+
pos++;
|
|
2116
|
+
}
|
|
2117
|
+
break;
|
|
2118
|
+
|
|
2119
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
|
|
2120
|
+
case STATE_SCRIPT_DATA_ESCAPED_DASH:
|
|
2121
|
+
// Consume the next input character:
|
|
2122
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2123
|
+
// Switch to the script data escaped dash dash state. Emit a U+002D
|
|
2124
|
+
// HYPHEN-MINUS character token.
|
|
2125
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2126
|
+
state = STATE_SCRIPT_DATA_ESCAPED_DASH_DASH;
|
|
2127
|
+
pos++;
|
|
2128
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2129
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2130
|
+
// Switch to the script data escaped less-than sign state.
|
|
2131
|
+
state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
2132
|
+
pos++;
|
|
2133
|
+
} else {
|
|
2134
|
+
// Anything else
|
|
2135
|
+
// Switch to the script data escaped state. Emit the current input character
|
|
2136
|
+
// as a character token.
|
|
2137
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2138
|
+
pos++;
|
|
2139
|
+
}
|
|
2140
|
+
break;
|
|
2141
|
+
|
|
2142
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
|
|
2143
|
+
case STATE_SCRIPT_DATA_ESCAPED_DASH_DASH:
|
|
2144
|
+
// Consume the next input character:
|
|
2145
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2146
|
+
// Emit a U+002D HYPHEN-MINUS character token.
|
|
2147
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2148
|
+
pos++;
|
|
2149
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2150
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2151
|
+
// Switch to the script data escaped less-than sign state.
|
|
2152
|
+
state = STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
|
2153
|
+
pos++;
|
|
2154
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2155
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2156
|
+
// Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
|
|
2157
|
+
// character token.
|
|
2158
|
+
state = STATE_SCRIPT_DATA;
|
|
2159
|
+
pos++;
|
|
2160
|
+
} else {
|
|
2161
|
+
// Anything else
|
|
2162
|
+
// Switch to the script data escaped state. Emit the current input character
|
|
2163
|
+
// as a character token.
|
|
2164
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2165
|
+
pos++;
|
|
2166
|
+
}
|
|
2167
|
+
break;
|
|
2168
|
+
|
|
2169
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
|
|
2170
|
+
case STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
|
|
2171
|
+
// Consume the next input character:
|
|
2172
|
+
// U+002F SOLIDUS (/)
|
|
2173
|
+
// Set the temporary buffer to the empty string. Switch to the script data
|
|
2174
|
+
// escaped end tag open state.
|
|
2175
|
+
if (cc === CC_SOLIDUS) {
|
|
2176
|
+
tempBuffer = "";
|
|
2177
|
+
state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
|
|
2178
|
+
pos++;
|
|
2179
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2180
|
+
// ASCII alpha
|
|
2181
|
+
// Set the temporary buffer to the empty string. Emit a U+003C LESS-THAN
|
|
2182
|
+
// SIGN character token. Reconsume in the script data double escape start
|
|
2183
|
+
// state.
|
|
2184
|
+
tempBuffer = "";
|
|
2185
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START;
|
|
2186
|
+
// Reconsume
|
|
2187
|
+
} else {
|
|
2188
|
+
// Anything else
|
|
2189
|
+
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
|
|
2190
|
+
// data escaped state.
|
|
2191
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2192
|
+
// Reconsume
|
|
2193
|
+
}
|
|
2194
|
+
break;
|
|
2195
|
+
|
|
2196
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
|
|
2197
|
+
case STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN:
|
|
2198
|
+
// Consume the next input character:
|
|
2199
|
+
// ASCII alpha
|
|
2200
|
+
// Create a new end tag token, set its tag name to the empty string.
|
|
2201
|
+
// Reconsume in the script data escaped end tag name state.
|
|
2202
|
+
if (isAsciiAlpha(cc)) {
|
|
2203
|
+
tagNameStart = pos;
|
|
2204
|
+
state = STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME;
|
|
2205
|
+
// Reconsume
|
|
2206
|
+
} else {
|
|
2207
|
+
// Anything else
|
|
2208
|
+
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
|
|
2209
|
+
// character token. Reconsume in the script data escaped state.
|
|
2210
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2211
|
+
// Reconsume
|
|
2212
|
+
}
|
|
2213
|
+
break;
|
|
2214
|
+
|
|
2215
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
|
|
2216
|
+
case STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME:
|
|
2217
|
+
// Consume the next input character:
|
|
2218
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2219
|
+
// U+000A LINE FEED (LF)
|
|
2220
|
+
// U+000C FORM FEED (FF)
|
|
2221
|
+
// U+0020 SPACE
|
|
2222
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2223
|
+
// to the before attribute name state. Otherwise, treat it as per the
|
|
2224
|
+
// "anything else" entry below.
|
|
2225
|
+
if (isSpace(cc)) {
|
|
2226
|
+
tagNameEnd = pos;
|
|
2227
|
+
if (
|
|
2228
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2229
|
+
lastOpenTagName
|
|
2230
|
+
) {
|
|
2231
|
+
state = STATE_BEFORE_ATTRIBUTE_NAME;
|
|
2232
|
+
pos++;
|
|
2233
|
+
} else {
|
|
2234
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2235
|
+
}
|
|
2236
|
+
} else if (cc === CC_SOLIDUS) {
|
|
2237
|
+
// U+002F SOLIDUS (/)
|
|
2238
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2239
|
+
// to the self-closing start tag state. Otherwise, treat it as per the
|
|
2240
|
+
// "anything else" entry below.
|
|
2241
|
+
tagNameEnd = pos;
|
|
2242
|
+
if (
|
|
2243
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2244
|
+
lastOpenTagName
|
|
2245
|
+
) {
|
|
2246
|
+
state = STATE_SELF_CLOSING_START_TAG;
|
|
2247
|
+
pos++;
|
|
2248
|
+
} else {
|
|
2249
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2250
|
+
}
|
|
2251
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2252
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2253
|
+
// If the current end tag token is an appropriate end tag token, then switch
|
|
2254
|
+
// to the data state and emit the current tag token. Otherwise, treat it as
|
|
2255
|
+
// per the "anything else" entry below.
|
|
2256
|
+
tagNameEnd = pos;
|
|
2257
|
+
if (
|
|
2258
|
+
input.slice(tagNameStart, tagNameEnd).toLowerCase() ===
|
|
2259
|
+
lastOpenTagName
|
|
2260
|
+
) {
|
|
2261
|
+
flushText(tagStart);
|
|
2262
|
+
state = STATE_DATA;
|
|
2263
|
+
pos = emitCloseTag(pos + 1);
|
|
2264
|
+
} else {
|
|
2265
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2266
|
+
}
|
|
2267
|
+
} else if (isAsciiAlpha(cc)) {
|
|
2268
|
+
// ASCII upper alpha / ASCII lower alpha
|
|
2269
|
+
pos++;
|
|
2270
|
+
} else {
|
|
2271
|
+
// Anything else
|
|
2272
|
+
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
|
2273
|
+
// token, and a character token for each of the characters in the temporary
|
|
2274
|
+
// buffer (in the order they were added to the buffer). Reconsume in the
|
|
2275
|
+
// script data escaped state.
|
|
2276
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2277
|
+
// Reconsume
|
|
2278
|
+
}
|
|
2279
|
+
break;
|
|
2280
|
+
|
|
2281
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
|
|
2282
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START:
|
|
2283
|
+
// Consume the next input character:
|
|
2284
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2285
|
+
// U+000A LINE FEED (LF)
|
|
2286
|
+
// U+000C FORM FEED (FF)
|
|
2287
|
+
// U+0020 SPACE
|
|
2288
|
+
// U+002F SOLIDUS (/)
|
|
2289
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2290
|
+
// If the temporary buffer is the string "script", then switch to the script
|
|
2291
|
+
// data double escaped state. Otherwise, switch to the script data escaped
|
|
2292
|
+
// state. Emit the current input character as a character token.
|
|
2293
|
+
if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
2294
|
+
state =
|
|
2295
|
+
tempBuffer === "script"
|
|
2296
|
+
? STATE_SCRIPT_DATA_DOUBLE_ESCAPED
|
|
2297
|
+
: STATE_SCRIPT_DATA_ESCAPED;
|
|
2298
|
+
pos++;
|
|
2299
|
+
} else if (isAsciiUpperAlpha(cc)) {
|
|
2300
|
+
// ASCII upper alpha
|
|
2301
|
+
// Append the lowercase version of the current input character (add 0x0020
|
|
2302
|
+
// to the character's code point) to the temporary buffer. Emit the current
|
|
2303
|
+
// input character as a character token.
|
|
2304
|
+
tempBuffer += String.fromCharCode(cc + 0x20);
|
|
2305
|
+
pos++;
|
|
2306
|
+
} else if (isAsciiLowerAlpha(cc)) {
|
|
2307
|
+
// ASCII lower alpha
|
|
2308
|
+
// Append the current input character to the temporary buffer. Emit the
|
|
2309
|
+
// current input character as a character token.
|
|
2310
|
+
tempBuffer += String.fromCharCode(cc);
|
|
2311
|
+
pos++;
|
|
2312
|
+
} else {
|
|
2313
|
+
// Anything else
|
|
2314
|
+
// Reconsume in the script data escaped state.
|
|
2315
|
+
state = STATE_SCRIPT_DATA_ESCAPED;
|
|
2316
|
+
// Reconsume
|
|
2317
|
+
}
|
|
2318
|
+
break;
|
|
2319
|
+
|
|
2320
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
|
|
2321
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED:
|
|
2322
|
+
// Consume the next input character:
|
|
2323
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2324
|
+
// Switch to the script data double escaped dash state. Emit a U+002D
|
|
2325
|
+
// HYPHEN-MINUS character token.
|
|
2326
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2327
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
|
|
2328
|
+
pos++;
|
|
2329
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2330
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2331
|
+
// Switch to the script data double escaped less-than sign state. Emit a
|
|
2332
|
+
// U+003C LESS-THAN SIGN character token.
|
|
2333
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
2334
|
+
pos++;
|
|
2335
|
+
} else {
|
|
2336
|
+
// Anything else
|
|
2337
|
+
// Emit the current input character as a character token.
|
|
2338
|
+
pos++;
|
|
2339
|
+
}
|
|
2340
|
+
break;
|
|
2341
|
+
|
|
2342
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
|
|
2343
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
|
|
2344
|
+
// Consume the next input character:
|
|
2345
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2346
|
+
// Switch to the script data double escaped dash dash state. Emit a U+002D
|
|
2347
|
+
// HYPHEN-MINUS character token.
|
|
2348
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2349
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
|
|
2350
|
+
pos++;
|
|
2351
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2352
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2353
|
+
// Switch to the script data double escaped less-than sign state. Emit a
|
|
2354
|
+
// U+003C LESS-THAN SIGN character token.
|
|
2355
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
2356
|
+
pos++;
|
|
2357
|
+
} else {
|
|
2358
|
+
// Anything else
|
|
2359
|
+
// Switch to the script data double escaped state. Emit the current input
|
|
2360
|
+
// character as a character token.
|
|
2361
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2362
|
+
pos++;
|
|
2363
|
+
}
|
|
2364
|
+
break;
|
|
2365
|
+
|
|
2366
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
|
|
2367
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
|
|
2368
|
+
// Consume the next input character:
|
|
2369
|
+
// U+002D HYPHEN-MINUS (-)
|
|
2370
|
+
// Emit a U+002D HYPHEN-MINUS character token.
|
|
2371
|
+
if (cc === CC_HYPHEN_MINUS) {
|
|
2372
|
+
pos++;
|
|
2373
|
+
} else if (cc === CC_LESS_THAN) {
|
|
2374
|
+
// U+003C LESS-THAN SIGN (<)
|
|
2375
|
+
// Switch to the script data double escaped less-than sign state. Emit a
|
|
2376
|
+
// U+003C LESS-THAN SIGN character token.
|
|
2377
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
|
2378
|
+
pos++;
|
|
2379
|
+
} else if (cc === CC_GREATER_THAN) {
|
|
2380
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2381
|
+
// Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
|
|
2382
|
+
// character token.
|
|
2383
|
+
state = STATE_SCRIPT_DATA;
|
|
2384
|
+
pos++;
|
|
2385
|
+
} else {
|
|
2386
|
+
// Anything else
|
|
2387
|
+
// Switch to the script data double escaped state. Emit the current input
|
|
2388
|
+
// character as a character token.
|
|
2389
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2390
|
+
pos++;
|
|
2391
|
+
}
|
|
2392
|
+
break;
|
|
2393
|
+
|
|
2394
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
|
|
2395
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
|
|
2396
|
+
// Consume the next input character:
|
|
2397
|
+
// U+002F SOLIDUS (/)
|
|
2398
|
+
// Set the temporary buffer to the empty string. Switch to the script data
|
|
2399
|
+
// double escape end state. Emit a U+002F SOLIDUS character token.
|
|
2400
|
+
if (cc === CC_SOLIDUS) {
|
|
2401
|
+
tempBuffer = "";
|
|
2402
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END;
|
|
2403
|
+
pos++;
|
|
2404
|
+
} else {
|
|
2405
|
+
// Anything else
|
|
2406
|
+
// Reconsume in the script data double escaped state.
|
|
2407
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2408
|
+
// Reconsume
|
|
2409
|
+
}
|
|
2410
|
+
break;
|
|
2411
|
+
|
|
2412
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
|
|
2413
|
+
case STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END:
|
|
2414
|
+
// Consume the next input character:
|
|
2415
|
+
// U+0009 CHARACTER TABULATION (tab)
|
|
2416
|
+
// U+000A LINE FEED (LF)
|
|
2417
|
+
// U+000C FORM FEED (FF)
|
|
2418
|
+
// U+0020 SPACE
|
|
2419
|
+
// U+002F SOLIDUS (/)
|
|
2420
|
+
// U+003E GREATER-THAN SIGN (>)
|
|
2421
|
+
// If the temporary buffer is the string "script", then switch to the script
|
|
2422
|
+
// data escaped state. Otherwise, switch to the script data double escaped
|
|
2423
|
+
// state. Emit the current input character as a character token.
|
|
2424
|
+
if (isSpace(cc) || cc === CC_SOLIDUS || cc === CC_GREATER_THAN) {
|
|
2425
|
+
state =
|
|
2426
|
+
tempBuffer === "script"
|
|
2427
|
+
? STATE_SCRIPT_DATA_ESCAPED
|
|
2428
|
+
: STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2429
|
+
pos++;
|
|
2430
|
+
} else if (isAsciiUpperAlpha(cc)) {
|
|
2431
|
+
// ASCII upper alpha
|
|
2432
|
+
// Append the lowercase version of the current input character (add 0x0020
|
|
2433
|
+
// to the character's code point) to the temporary buffer. Emit the current
|
|
2434
|
+
// input character as a character token.
|
|
2435
|
+
if (tempBuffer.length < 6) {
|
|
2436
|
+
tempBuffer += String.fromCharCode(cc + 0x20);
|
|
2437
|
+
}
|
|
2438
|
+
pos++;
|
|
2439
|
+
} else if (isAsciiLowerAlpha(cc)) {
|
|
2440
|
+
// ASCII lower alpha
|
|
2441
|
+
// Append the current input character to the temporary buffer. Emit the
|
|
2442
|
+
// current input character as a character token.
|
|
2443
|
+
if (tempBuffer.length < 6) {
|
|
2444
|
+
tempBuffer += String.fromCharCode(cc);
|
|
2445
|
+
}
|
|
2446
|
+
pos++;
|
|
2447
|
+
} else {
|
|
2448
|
+
// Anything else
|
|
2449
|
+
// Reconsume in the script data double escaped state.
|
|
2450
|
+
state = STATE_SCRIPT_DATA_DOUBLE_ESCAPED;
|
|
2451
|
+
// Reconsume
|
|
2452
|
+
}
|
|
2453
|
+
break;
|
|
2454
|
+
|
|
2455
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
|
|
2456
|
+
case STATE_PLAINTEXT:
|
|
2457
|
+
// Consume the next input character:
|
|
2458
|
+
// Anything else
|
|
2459
|
+
// Emit the current input character as a character token.
|
|
2460
|
+
pos++;
|
|
2461
|
+
break;
|
|
2462
|
+
|
|
2463
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
|
|
2464
|
+
case STATE_CHARACTER_REFERENCE:
|
|
2465
|
+
// Set the temporary buffer to the empty string. Append a U+0026
|
|
2466
|
+
// AMPERSAND (&) character to the temporary buffer.
|
|
2467
|
+
// Consume the next input character:
|
|
2468
|
+
if (isAsciiAlphanumeric(cc)) {
|
|
2469
|
+
// ASCII alphanumeric
|
|
2470
|
+
// Reconsume in the named character reference state.
|
|
2471
|
+
state = STATE_NAMED_CHARACTER_REFERENCE;
|
|
2472
|
+
// Reconsume
|
|
2473
|
+
} else if (cc === CC_NUMBER_SIGN) {
|
|
2474
|
+
// U+0023 NUMBER SIGN (#)
|
|
2475
|
+
// Append the current input character to the temporary buffer.
|
|
2476
|
+
// Switch to the numeric character reference state.
|
|
2477
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE;
|
|
2478
|
+
pos++;
|
|
2479
|
+
} else {
|
|
2480
|
+
// Anything else
|
|
2481
|
+
// Flush code points consumed as a character reference.
|
|
2482
|
+
// Reconsume in the return state.
|
|
2483
|
+
state = returnState;
|
|
2484
|
+
// Reconsume
|
|
2485
|
+
}
|
|
2486
|
+
break;
|
|
2487
|
+
|
|
2488
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
|
2489
|
+
case STATE_NAMED_CHARACTER_REFERENCE:
|
|
2490
|
+
// Consume the maximum number of characters possible, where the
|
|
2491
|
+
// consumed characters are one of the identifiers in the first
|
|
2492
|
+
// column of the named character references table. Append each
|
|
2493
|
+
// character to the temporary buffer when it's consumed.
|
|
2494
|
+
//
|
|
2495
|
+
// TODO(named-entities): The WHATWG spec requires matching against
|
|
2496
|
+
// the full named character references table (~2,000 entries).
|
|
2497
|
+
// For now, we scan past the entity name without table lookup to keep
|
|
2498
|
+
// the core tokenizer lightweight. A follow-up PR will provide a build-time
|
|
2499
|
+
// script to generate a compact Trie/Map or dynamic import for the full
|
|
2500
|
+
// table, along with the `is_consumed_as_part_of_an_attribute` check.
|
|
2501
|
+
// The semantic fallback for consumers (like HtmlParser) is to use the
|
|
2502
|
+
// minimal `decodeHtmlEntities` utility exported below, which guarantees
|
|
2503
|
+
// correctness for URLs (`&`) and common characters without bundle bloat.
|
|
2504
|
+
namedEntityConsumed = 0;
|
|
2505
|
+
while (
|
|
2506
|
+
pos + namedEntityConsumed < len &&
|
|
2507
|
+
isAsciiAlphanumeric(input.charCodeAt(pos + namedEntityConsumed))
|
|
2508
|
+
) {
|
|
2509
|
+
namedEntityConsumed++;
|
|
2510
|
+
// Safety cap — the longest entity is ~33 chars
|
|
2511
|
+
if (namedEntityConsumed > 33) break;
|
|
2512
|
+
}
|
|
2513
|
+
// Check for trailing semicolon
|
|
2514
|
+
if (
|
|
2515
|
+
pos + namedEntityConsumed < len &&
|
|
2516
|
+
input.charCodeAt(pos + namedEntityConsumed) === CC_SEMICOLON
|
|
2517
|
+
) {
|
|
2518
|
+
namedEntityConsumed++;
|
|
2519
|
+
}
|
|
2520
|
+
if (namedEntityConsumed > 0) {
|
|
2521
|
+
pos += namedEntityConsumed;
|
|
2522
|
+
state = returnState;
|
|
2523
|
+
} else {
|
|
2524
|
+
// No match — flush code points consumed as a character
|
|
2525
|
+
// reference. Switch to the ambiguous ampersand state.
|
|
2526
|
+
state = STATE_AMBIGUOUS_AMPERSAND;
|
|
2527
|
+
}
|
|
2528
|
+
break;
|
|
2529
|
+
|
|
2530
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
|
|
2531
|
+
case STATE_AMBIGUOUS_AMPERSAND:
|
|
2532
|
+
// Consume the next input character:
|
|
2533
|
+
if (isAsciiAlphanumeric(cc)) {
|
|
2534
|
+
// ASCII alphanumeric
|
|
2535
|
+
// If the character reference was consumed as part of an
|
|
2536
|
+
// attribute, then append the current input character to
|
|
2537
|
+
// the current attribute's value. Otherwise, emit the
|
|
2538
|
+
// current input character as a character token.
|
|
2539
|
+
pos++;
|
|
2540
|
+
} else if (cc === CC_SEMICOLON) {
|
|
2541
|
+
// U+003B SEMICOLON (;)
|
|
2542
|
+
// This is an unknown-named-character-reference parse error.
|
|
2543
|
+
// Reconsume in the return state.
|
|
2544
|
+
state = returnState;
|
|
2545
|
+
// Reconsume
|
|
2546
|
+
} else {
|
|
2547
|
+
// Anything else
|
|
2548
|
+
// Reconsume in the return state.
|
|
2549
|
+
state = returnState;
|
|
2550
|
+
// Reconsume
|
|
2551
|
+
}
|
|
2552
|
+
break;
|
|
2553
|
+
|
|
2554
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
|
|
2555
|
+
case STATE_NUMERIC_CHARACTER_REFERENCE:
|
|
2556
|
+
// Set the character reference code to zero (0).
|
|
2557
|
+
// Consume the next input character:
|
|
2558
|
+
if (cc === 0x78 || cc === 0x58) {
|
|
2559
|
+
// U+0078 LATIN SMALL LETTER X
|
|
2560
|
+
// U+0058 LATIN CAPITAL LETTER X
|
|
2561
|
+
// Append the current input character to the temporary
|
|
2562
|
+
// buffer. Switch to the hexadecimal character reference
|
|
2563
|
+
// start state.
|
|
2564
|
+
state = STATE_HEXADECIMAL_CHARACTER_REFERENCE_START;
|
|
2565
|
+
pos++;
|
|
2566
|
+
} else {
|
|
2567
|
+
// Anything else
|
|
2568
|
+
// Reconsume in the decimal character reference start state.
|
|
2569
|
+
state = STATE_DECIMAL_CHARACTER_REFERENCE_START;
|
|
2570
|
+
// Reconsume
|
|
2571
|
+
}
|
|
2572
|
+
break;
|
|
2573
|
+
|
|
2574
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
|
|
2575
|
+
case STATE_HEXADECIMAL_CHARACTER_REFERENCE_START:
|
|
2576
|
+
// Consume the next input character:
|
|
2577
|
+
// ASCII hex digit: reconsume in the hexadecimal character reference state.
|
|
2578
|
+
// Anything else: absence-of-digits-in-numeric-character-reference parse
|
|
2579
|
+
// error. Flush code points consumed as a character reference. Reconsume
|
|
2580
|
+
// in the return state.
|
|
2581
|
+
state = isAsciiHexDigit(cc)
|
|
2582
|
+
? STATE_HEXADECIMAL_CHARACTER_REFERENCE
|
|
2583
|
+
: returnState;
|
|
2584
|
+
// Reconsume
|
|
2585
|
+
break;
|
|
2586
|
+
|
|
2587
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
|
|
2588
|
+
case STATE_DECIMAL_CHARACTER_REFERENCE_START:
|
|
2589
|
+
// Consume the next input character:
|
|
2590
|
+
// ASCII digit: reconsume in the decimal character reference state.
|
|
2591
|
+
// Anything else: absence-of-digits-in-numeric-character-reference parse
|
|
2592
|
+
// error. Flush code points consumed as a character reference. Reconsume
|
|
2593
|
+
// in the return state.
|
|
2594
|
+
state = isAsciiDigit(cc)
|
|
2595
|
+
? STATE_DECIMAL_CHARACTER_REFERENCE
|
|
2596
|
+
: returnState;
|
|
2597
|
+
// Reconsume
|
|
2598
|
+
break;
|
|
2599
|
+
|
|
2600
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
|
|
2601
|
+
case STATE_HEXADECIMAL_CHARACTER_REFERENCE:
|
|
2602
|
+
// Consume the next input character:
|
|
2603
|
+
if (isAsciiHexDigit(cc)) {
|
|
2604
|
+
// ASCII digit / upper hex / lower hex
|
|
2605
|
+
// Multiply the character reference code by 16. Add a numeric
|
|
2606
|
+
// version of the current input character to the character
|
|
2607
|
+
// reference code.
|
|
2608
|
+
pos++;
|
|
2609
|
+
} else if (cc === CC_SEMICOLON) {
|
|
2610
|
+
// U+003B SEMICOLON
|
|
2611
|
+
// Switch to the numeric character reference end state.
|
|
2612
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2613
|
+
pos++;
|
|
2614
|
+
} else {
|
|
2615
|
+
// Anything else
|
|
2616
|
+
// This is a missing-semicolon-after-character-reference
|
|
2617
|
+
// parse error. Reconsume in the numeric character reference
|
|
2618
|
+
// end state.
|
|
2619
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2620
|
+
// Reconsume
|
|
2621
|
+
}
|
|
2622
|
+
break;
|
|
2623
|
+
|
|
2624
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
|
|
2625
|
+
case STATE_DECIMAL_CHARACTER_REFERENCE:
|
|
2626
|
+
// Consume the next input character:
|
|
2627
|
+
if (isAsciiDigit(cc)) {
|
|
2628
|
+
// ASCII digit
|
|
2629
|
+
// Multiply the character reference code by 10. Add a numeric
|
|
2630
|
+
// version of the current input character (subtract 0x0030
|
|
2631
|
+
// from the character's code point) to the character reference
|
|
2632
|
+
// code.
|
|
2633
|
+
pos++;
|
|
2634
|
+
} else if (cc === CC_SEMICOLON) {
|
|
2635
|
+
// U+003B SEMICOLON
|
|
2636
|
+
// Switch to the numeric character reference end state.
|
|
2637
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2638
|
+
pos++;
|
|
2639
|
+
} else {
|
|
2640
|
+
// Anything else
|
|
2641
|
+
// This is a missing-semicolon-after-character-reference
|
|
2642
|
+
// parse error. Reconsume in the numeric character reference
|
|
2643
|
+
// end state.
|
|
2644
|
+
state = STATE_NUMERIC_CHARACTER_REFERENCE_END;
|
|
2645
|
+
// Reconsume
|
|
2646
|
+
}
|
|
2647
|
+
break;
|
|
2648
|
+
|
|
2649
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
|
2650
|
+
case STATE_NUMERIC_CHARACTER_REFERENCE_END:
|
|
2651
|
+
// Check the character reference code (validation omitted for
|
|
2652
|
+
// the scanner — we don't decode, just skip past the entity).
|
|
2653
|
+
// Flush code points consumed as a character reference.
|
|
2654
|
+
// Switch to the return state.
|
|
2655
|
+
state = returnState;
|
|
2656
|
+
// Reconsume
|
|
2657
|
+
break;
|
|
2658
|
+
|
|
2659
|
+
default:
|
|
2660
|
+
pos++;
|
|
2661
|
+
}
|
|
2662
|
+
}
|
|
2663
|
+
|
|
2664
|
+
if (
|
|
2665
|
+
(state >= STATE_MARKUP_DECLARATION_OPEN && state <= STATE_BOGUS_COMMENT) ||
|
|
2666
|
+
(state >= STATE_COMMENT_LESS_THAN_SIGN &&
|
|
2667
|
+
state <= STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH) ||
|
|
2668
|
+
(state >= STATE_CDATA_SECTION && state <= STATE_CDATA_SECTION_END)
|
|
2669
|
+
) {
|
|
2670
|
+
if (callbacks.comment !== undefined) {
|
|
2671
|
+
pos = callbacks.comment(input, commentStart, len);
|
|
2672
|
+
}
|
|
2673
|
+
} else if (state >= STATE_DOCTYPE && state <= STATE_BOGUS_DOCTYPE) {
|
|
2674
|
+
if (callbacks.doctype !== undefined) {
|
|
2675
|
+
pos = callbacks.doctype(input, commentStart, len);
|
|
2676
|
+
}
|
|
2677
|
+
} else if (textStart < len && callbacks.text !== undefined) {
|
|
2678
|
+
callbacks.text(input, textStart, len);
|
|
2679
|
+
}
|
|
2680
|
+
|
|
2681
|
+
return pos;
|
|
2682
|
+
};
|
|
2683
|
+
|
|
2684
|
+
walkHtmlTokens.QUOTE_NONE = QUOTE_NONE;
|
|
2685
|
+
walkHtmlTokens.QUOTE_SINGLE = QUOTE_SINGLE;
|
|
2686
|
+
walkHtmlTokens.QUOTE_DOUBLE = QUOTE_DOUBLE;
|
|
2687
|
+
|
|
2688
|
+
const MINIMAL_ENTITIES = {
|
|
2689
|
+
amp: "&",
|
|
2690
|
+
lt: "<",
|
|
2691
|
+
gt: ">",
|
|
2692
|
+
quot: '"',
|
|
2693
|
+
apos: "'",
|
|
2694
|
+
nbsp: "\u00A0"
|
|
2695
|
+
};
|
|
2696
|
+
|
|
2697
|
+
/**
|
|
2698
|
+
* Minimal entity decoder for safe string resolution without bundle bloat.
|
|
2699
|
+
* Decodes the core URL-safe named entities and all numeric references.
|
|
2700
|
+
* Leaves unknown entities as literal strings to prevent silent character drops.
|
|
2701
|
+
* @param {string} str the raw string from the token slice
|
|
2702
|
+
* @returns {string} decoded string
|
|
2703
|
+
*/
|
|
2704
|
+
walkHtmlTokens.decodeHtmlEntities = (str) => {
|
|
2705
|
+
if (!str.includes("&")) return str;
|
|
2706
|
+
|
|
2707
|
+
return str.replace(/&(#?[0-9a-zA-Z]+);?/g, (match, entity) => {
|
|
2708
|
+
// Decimal numeric reference: A
|
|
2709
|
+
if (entity.charCodeAt(0) === 0x23 /* # */) {
|
|
2710
|
+
const isHex =
|
|
2711
|
+
entity.charCodeAt(1) === 0x78 || entity.charCodeAt(1) === 0x58; // x or X
|
|
2712
|
+
const code = isHex
|
|
2713
|
+
? Number.parseInt(entity.slice(2), 16)
|
|
2714
|
+
: Number.parseInt(entity.slice(1), 10);
|
|
2715
|
+
if (!Number.isNaN(code)) {
|
|
2716
|
+
// Handle basic out-of-bounds (minimal approximation of WHATWG replacement char)
|
|
2717
|
+
return code > 0x10ffff ? "\uFFFD" : String.fromCodePoint(code);
|
|
2718
|
+
}
|
|
2719
|
+
return match; // Invalid numeric (e.g. &#;)
|
|
2720
|
+
}
|
|
2721
|
+
|
|
2722
|
+
// Known minimal named reference: &
|
|
2723
|
+
const key = entity.toLowerCase();
|
|
2724
|
+
if (Object.prototype.hasOwnProperty.call(MINIMAL_ENTITIES, key)) {
|
|
2725
|
+
return /** @type {Record<string, string>} */ (MINIMAL_ENTITIES)[key];
|
|
2726
|
+
}
|
|
2727
|
+
|
|
2728
|
+
// Unknown named entity: preserve as literal to avoid data loss
|
|
2729
|
+
return match;
|
|
2730
|
+
});
|
|
2731
|
+
};
|
|
2732
|
+
|
|
2733
|
+
module.exports = walkHtmlTokens;
|