nokogiri 1.10.10 → 1.13.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +178 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +761 -424
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +199 -88
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +21 -21
- data/ext/nokogiri/xml_cdata.c +14 -19
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +296 -220
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +25 -25
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +98 -53
- data/ext/nokogiri/xml_node.c +1065 -653
- data/ext/nokogiri/xml_node_set.c +178 -166
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +277 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +112 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +98 -48
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +226 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +103 -105
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +222 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +74 -33
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +224 -86
- data/lib/nokogiri/xml/document_fragment.rb +46 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +10 -5
- data/lib/nokogiri/xml/node.rb +884 -378
- data/lib/nokogiri/xml/node_set.rb +51 -54
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +22 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +38 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +189 -142
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -0,0 +1,578 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML4
|
5
|
+
class ElementDescription
|
6
|
+
# Methods are defined protected by method_defined? because at
|
7
|
+
# this point the C-library or Java library is already loaded,
|
8
|
+
# and we don't want to clobber any methods that have been
|
9
|
+
# defined there.
|
10
|
+
|
11
|
+
Desc = Struct.new("HTMLElementDescription", :name,
|
12
|
+
:startTag, :endTag, :saveEndTag,
|
13
|
+
:empty, :depr, :dtd, :isinline,
|
14
|
+
:desc,
|
15
|
+
:subelts, :defaultsubelt,
|
16
|
+
:attrs_opt, :attrs_depr, :attrs_req)
|
17
|
+
|
18
|
+
# This is filled in down below.
|
19
|
+
DefaultDescriptions = {} # rubocop:disable Naming/ConstantName
|
20
|
+
|
21
|
+
def default_desc
|
22
|
+
DefaultDescriptions[name.downcase]
|
23
|
+
end
|
24
|
+
private :default_desc
|
25
|
+
|
26
|
+
unless method_defined?(:implied_start_tag?)
|
27
|
+
def implied_start_tag?
|
28
|
+
d = default_desc
|
29
|
+
d ? d.startTag : nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
unless method_defined?(:implied_end_tag?)
|
34
|
+
def implied_end_tag?
|
35
|
+
d = default_desc
|
36
|
+
d ? d.endTag : nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
unless method_defined?(:save_end_tag?)
|
41
|
+
def save_end_tag?
|
42
|
+
d = default_desc
|
43
|
+
d ? d.saveEndTag : nil
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
unless method_defined?(:deprecated?)
|
48
|
+
def deprecated?
|
49
|
+
d = default_desc
|
50
|
+
d ? d.depr : nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
unless method_defined?(:description)
|
55
|
+
def description
|
56
|
+
d = default_desc
|
57
|
+
d ? d.desc : nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
unless method_defined?(:default_sub_element)
|
62
|
+
def default_sub_element
|
63
|
+
d = default_desc
|
64
|
+
d ? d.defaultsubelt : nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
unless method_defined?(:optional_attributes)
|
69
|
+
def optional_attributes
|
70
|
+
d = default_desc
|
71
|
+
d ? d.attrs_opt : []
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
unless method_defined?(:deprecated_attributes)
|
76
|
+
def deprecated_attributes
|
77
|
+
d = default_desc
|
78
|
+
d ? d.attrs_depr : []
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
unless method_defined?(:required_attributes)
|
83
|
+
def required_attributes
|
84
|
+
d = default_desc
|
85
|
+
d ? d.attrs_req : []
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
###
|
90
|
+
# Default Element Descriptions (HTML 4.0) copied from
|
91
|
+
# libxml2/HTMLparser.c and libxml2/include/libxml/HTMLparser.h
|
92
|
+
#
|
93
|
+
# The copyright notice for those files and the following list of
|
94
|
+
# element and attribute descriptions is reproduced here:
|
95
|
+
#
|
96
|
+
# Except where otherwise noted in the source code (e.g. the
|
97
|
+
# files hash.c, list.c and the trio files, which are covered by
|
98
|
+
# a similar licence but with different Copyright notices) all
|
99
|
+
# the files are:
|
100
|
+
#
|
101
|
+
# Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
|
102
|
+
#
|
103
|
+
# Permission is hereby granted, free of charge, to any person
|
104
|
+
# obtaining a copy of this software and associated documentation
|
105
|
+
# files (the "Software"), to deal in the Software without
|
106
|
+
# restriction, including without limitation the rights to use,
|
107
|
+
# copy, modify, merge, publish, distribute, sublicense, and/or
|
108
|
+
# sell copies of the Software, and to permit persons to whom the
|
109
|
+
# Software is fur- nished to do so, subject to the following
|
110
|
+
# conditions:
|
111
|
+
|
112
|
+
# The above copyright notice and this permission notice shall be
|
113
|
+
# included in all copies or substantial portions of the
|
114
|
+
# Software.
|
115
|
+
|
116
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
117
|
+
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
118
|
+
# WARRANTIES OF MERCHANTABILITY, FIT- NESS FOR A PARTICULAR
|
119
|
+
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE DANIEL
|
120
|
+
# VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
121
|
+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
122
|
+
# FROM, OUT OF OR IN CON- NECTION WITH THE SOFTWARE OR THE USE
|
123
|
+
# OR OTHER DEALINGS IN THE SOFTWARE.
|
124
|
+
|
125
|
+
# Except as contained in this notice, the name of Daniel
|
126
|
+
# Veillard shall not be used in advertising or otherwise to
|
127
|
+
# promote the sale, use or other deal- ings in this Software
|
128
|
+
# without prior written authorization from him.
|
129
|
+
|
130
|
+
# Attributes defined and categorized
|
131
|
+
FONTSTYLE = ["tt", "i", "b", "u", "s", "strike", "big", "small"]
|
132
|
+
PHRASE = ["em", "strong", "dfn", "code", "samp",
|
133
|
+
"kbd", "var", "cite", "abbr", "acronym",]
|
134
|
+
SPECIAL = ["a", "img", "applet", "embed", "object", "font", "basefont",
|
135
|
+
"br", "script", "map", "q", "sub", "sup", "span", "bdo",
|
136
|
+
"iframe",]
|
137
|
+
PCDATA = []
|
138
|
+
HEADING = ["h1", "h2", "h3", "h4", "h5", "h6"]
|
139
|
+
LIST = ["ul", "ol", "dir", "menu"]
|
140
|
+
FORMCTRL = ["input", "select", "textarea", "label", "button"]
|
141
|
+
BLOCK = [HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript",
|
142
|
+
"noframes", "blockquote", "form", "isindex", "hr", "table",
|
143
|
+
"fieldset", "address",]
|
144
|
+
INLINE = [PCDATA, FONTSTYLE, PHRASE, SPECIAL, FORMCTRL]
|
145
|
+
FLOW = [BLOCK, INLINE]
|
146
|
+
MODIFIER = []
|
147
|
+
EMPTY = []
|
148
|
+
|
149
|
+
HTML_FLOW = FLOW
|
150
|
+
HTML_INLINE = INLINE
|
151
|
+
HTML_PCDATA = PCDATA
|
152
|
+
HTML_CDATA = HTML_PCDATA
|
153
|
+
|
154
|
+
COREATTRS = ["id", "class", "style", "title"]
|
155
|
+
I18N = ["lang", "dir"]
|
156
|
+
EVENTS = ["onclick", "ondblclick", "onmousedown", "onmouseup",
|
157
|
+
"onmouseover", "onmouseout", "onkeypress", "onkeydown",
|
158
|
+
"onkeyup",]
|
159
|
+
ATTRS = [COREATTRS, I18N, EVENTS]
|
160
|
+
CELLHALIGN = ["align", "char", "charoff"]
|
161
|
+
CELLVALIGN = ["valign"]
|
162
|
+
|
163
|
+
HTML_ATTRS = ATTRS
|
164
|
+
CORE_I18N_ATTRS = [COREATTRS, I18N]
|
165
|
+
CORE_ATTRS = COREATTRS
|
166
|
+
I18N_ATTRS = I18N
|
167
|
+
|
168
|
+
A_ATTRS = [ATTRS, "charset", "type", "name",
|
169
|
+
"href", "hreflang", "rel", "rev", "accesskey", "shape",
|
170
|
+
"coords", "tabindex", "onfocus", "onblur",]
|
171
|
+
TARGET_ATTR = ["target"]
|
172
|
+
ROWS_COLS_ATTR = ["rows", "cols"]
|
173
|
+
ALT_ATTR = ["alt"]
|
174
|
+
SRC_ALT_ATTRS = ["src", "alt"]
|
175
|
+
HREF_ATTRS = ["href"]
|
176
|
+
CLEAR_ATTRS = ["clear"]
|
177
|
+
INLINE_P = [INLINE, "p"]
|
178
|
+
|
179
|
+
FLOW_PARAM = [FLOW, "param"]
|
180
|
+
APPLET_ATTRS = [COREATTRS, "codebase",
|
181
|
+
"archive", "alt", "name", "height", "width", "align",
|
182
|
+
"hspace", "vspace",]
|
183
|
+
AREA_ATTRS = ["shape", "coords", "href", "nohref",
|
184
|
+
"tabindex", "accesskey", "onfocus", "onblur",]
|
185
|
+
BASEFONT_ATTRS = ["id", "size", "color", "face"]
|
186
|
+
QUOTE_ATTRS = [ATTRS, "cite"]
|
187
|
+
BODY_CONTENTS = [FLOW, "ins", "del"]
|
188
|
+
BODY_ATTRS = [ATTRS, "onload", "onunload"]
|
189
|
+
BODY_DEPR = ["background", "bgcolor", "text",
|
190
|
+
"link", "vlink", "alink",]
|
191
|
+
BUTTON_ATTRS = [ATTRS, "name", "value", "type",
|
192
|
+
"disabled", "tabindex", "accesskey", "onfocus", "onblur",]
|
193
|
+
|
194
|
+
COL_ATTRS = [ATTRS, "span", "width", CELLHALIGN, CELLVALIGN]
|
195
|
+
COL_ELT = ["col"]
|
196
|
+
EDIT_ATTRS = [ATTRS, "datetime", "cite"]
|
197
|
+
COMPACT_ATTRS = [ATTRS, "compact"]
|
198
|
+
DL_CONTENTS = ["dt", "dd"]
|
199
|
+
COMPACT_ATTR = ["compact"]
|
200
|
+
LABEL_ATTR = ["label"]
|
201
|
+
FIELDSET_CONTENTS = [FLOW, "legend"]
|
202
|
+
FONT_ATTRS = [COREATTRS, I18N, "size", "color", "face"]
|
203
|
+
FORM_CONTENTS = [HEADING, LIST, INLINE, "pre", "p", "div", "center",
|
204
|
+
"noscript", "noframes", "blockquote", "isindex", "hr",
|
205
|
+
"table", "fieldset", "address",]
|
206
|
+
FORM_ATTRS = [ATTRS, "method", "enctype", "accept", "name", "onsubmit",
|
207
|
+
"onreset", "accept-charset",]
|
208
|
+
FRAME_ATTRS = [COREATTRS, "longdesc", "name", "src", "frameborder",
|
209
|
+
"marginwidth", "marginheight", "noresize", "scrolling",]
|
210
|
+
FRAMESET_ATTRS = [COREATTRS, "rows", "cols", "onload", "onunload"]
|
211
|
+
FRAMESET_CONTENTS = ["frameset", "frame", "noframes"]
|
212
|
+
HEAD_ATTRS = [I18N, "profile"]
|
213
|
+
HEAD_CONTENTS = ["title", "isindex", "base", "script", "style", "meta",
|
214
|
+
"link", "object",]
|
215
|
+
HR_DEPR = ["align", "noshade", "size", "width"]
|
216
|
+
VERSION_ATTR = ["version"]
|
217
|
+
HTML_CONTENT = ["head", "body", "frameset"]
|
218
|
+
IFRAME_ATTRS = [COREATTRS, "longdesc", "name", "src", "frameborder",
|
219
|
+
"marginwidth", "marginheight", "scrolling", "align",
|
220
|
+
"height", "width",]
|
221
|
+
IMG_ATTRS = [ATTRS, "longdesc", "name", "height", "width", "usemap",
|
222
|
+
"ismap",]
|
223
|
+
EMBED_ATTRS = [COREATTRS, "align", "alt", "border", "code", "codebase",
|
224
|
+
"frameborder", "height", "hidden", "hspace", "name",
|
225
|
+
"palette", "pluginspace", "pluginurl", "src", "type",
|
226
|
+
"units", "vspace", "width",]
|
227
|
+
INPUT_ATTRS = [ATTRS, "type", "name", "value", "checked", "disabled",
|
228
|
+
"readonly", "size", "maxlength", "src", "alt", "usemap",
|
229
|
+
"ismap", "tabindex", "accesskey", "onfocus", "onblur",
|
230
|
+
"onselect", "onchange", "accept",]
|
231
|
+
PROMPT_ATTRS = [COREATTRS, I18N, "prompt"]
|
232
|
+
LABEL_ATTRS = [ATTRS, "for", "accesskey", "onfocus", "onblur"]
|
233
|
+
LEGEND_ATTRS = [ATTRS, "accesskey"]
|
234
|
+
ALIGN_ATTR = ["align"]
|
235
|
+
LINK_ATTRS = [ATTRS, "charset", "href", "hreflang", "type", "rel", "rev",
|
236
|
+
"media",]
|
237
|
+
MAP_CONTENTS = [BLOCK, "area"]
|
238
|
+
NAME_ATTR = ["name"]
|
239
|
+
ACTION_ATTR = ["action"]
|
240
|
+
BLOCKLI_ELT = [BLOCK, "li"]
|
241
|
+
META_ATTRS = [I18N, "http-equiv", "name", "scheme"]
|
242
|
+
CONTENT_ATTR = ["content"]
|
243
|
+
TYPE_ATTR = ["type"]
|
244
|
+
NOFRAMES_CONTENT = ["body", FLOW, MODIFIER]
|
245
|
+
OBJECT_CONTENTS = [FLOW, "param"]
|
246
|
+
OBJECT_ATTRS = [ATTRS, "declare", "classid", "codebase", "data", "type",
|
247
|
+
"codetype", "archive", "standby", "height", "width",
|
248
|
+
"usemap", "name", "tabindex",]
|
249
|
+
OBJECT_DEPR = ["align", "border", "hspace", "vspace"]
|
250
|
+
OL_ATTRS = ["type", "compact", "start"]
|
251
|
+
OPTION_ELT = ["option"]
|
252
|
+
OPTGROUP_ATTRS = [ATTRS, "disabled"]
|
253
|
+
OPTION_ATTRS = [ATTRS, "disabled", "label", "selected", "value"]
|
254
|
+
PARAM_ATTRS = ["id", "value", "valuetype", "type"]
|
255
|
+
WIDTH_ATTR = ["width"]
|
256
|
+
PRE_CONTENT = [PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br",
|
257
|
+
"script", "map", "q", "span", "bdo", "iframe",]
|
258
|
+
SCRIPT_ATTRS = ["charset", "src", "defer", "event", "for"]
|
259
|
+
LANGUAGE_ATTR = ["language"]
|
260
|
+
SELECT_CONTENT = ["optgroup", "option"]
|
261
|
+
SELECT_ATTRS = [ATTRS, "name", "size", "multiple", "disabled", "tabindex",
|
262
|
+
"onfocus", "onblur", "onchange",]
|
263
|
+
STYLE_ATTRS = [I18N, "media", "title"]
|
264
|
+
TABLE_ATTRS = [ATTRS, "summary", "width", "border", "frame", "rules",
|
265
|
+
"cellspacing", "cellpadding", "datapagesize",]
|
266
|
+
TABLE_DEPR = ["align", "bgcolor"]
|
267
|
+
TABLE_CONTENTS = ["caption", "col", "colgroup", "thead", "tfoot", "tbody",
|
268
|
+
"tr",]
|
269
|
+
TR_ELT = ["tr"]
|
270
|
+
TALIGN_ATTRS = [ATTRS, CELLHALIGN, CELLVALIGN]
|
271
|
+
TH_TD_DEPR = ["nowrap", "bgcolor", "width", "height"]
|
272
|
+
TH_TD_ATTR = [ATTRS, "abbr", "axis", "headers", "scope", "rowspan",
|
273
|
+
"colspan", CELLHALIGN, CELLVALIGN,]
|
274
|
+
TEXTAREA_ATTRS = [ATTRS, "name", "disabled", "readonly", "tabindex",
|
275
|
+
"accesskey", "onfocus", "onblur", "onselect",
|
276
|
+
"onchange",]
|
277
|
+
TR_CONTENTS = ["th", "td"]
|
278
|
+
BGCOLOR_ATTR = ["bgcolor"]
|
279
|
+
LI_ELT = ["li"]
|
280
|
+
UL_DEPR = ["type", "compact"]
|
281
|
+
DIR_ATTR = ["dir"]
|
282
|
+
|
283
|
+
[
|
284
|
+
["a", false, false, false, false, false, :any, true,
|
285
|
+
"anchor ",
|
286
|
+
HTML_INLINE, nil, A_ATTRS, TARGET_ATTR, [],],
|
287
|
+
["abbr", false, false, false, false, false, :any, true,
|
288
|
+
"abbreviated form",
|
289
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
290
|
+
["acronym", false, false, false, false, false, :any, true, "",
|
291
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
292
|
+
["address", false, false, false, false, false, :any, false,
|
293
|
+
"information on author",
|
294
|
+
INLINE_P, nil, HTML_ATTRS, [], [],],
|
295
|
+
["applet", false, false, false, false, true, :loose, true,
|
296
|
+
"java applet ",
|
297
|
+
FLOW_PARAM, nil, [], APPLET_ATTRS, [],],
|
298
|
+
["area", false, true, true, true, false, :any, false,
|
299
|
+
"client-side image map area ",
|
300
|
+
EMPTY, nil, AREA_ATTRS, TARGET_ATTR, ALT_ATTR,],
|
301
|
+
["b", false, true, false, false, false, :any, true,
|
302
|
+
"bold text style",
|
303
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
304
|
+
["base", false, true, true, true, false, :any, false,
|
305
|
+
"document base uri ",
|
306
|
+
EMPTY, nil, [], TARGET_ATTR, HREF_ATTRS,],
|
307
|
+
["basefont", false, true, true, true, true, :loose, true,
|
308
|
+
"base font size ",
|
309
|
+
EMPTY, nil, [], BASEFONT_ATTRS, [],],
|
310
|
+
["bdo", false, false, false, false, false, :any, true,
|
311
|
+
"i18n bidi over-ride ",
|
312
|
+
HTML_INLINE, nil, CORE_I18N_ATTRS, [], DIR_ATTR,],
|
313
|
+
["big", false, true, false, false, false, :any, true,
|
314
|
+
"large text style",
|
315
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
316
|
+
["blockquote", false, false, false, false, false, :any, false,
|
317
|
+
"long quotation ",
|
318
|
+
HTML_FLOW, nil, QUOTE_ATTRS, [], [],],
|
319
|
+
["body", true, true, false, false, false, :any, false,
|
320
|
+
"document body ",
|
321
|
+
BODY_CONTENTS, "div", BODY_ATTRS, BODY_DEPR, [],],
|
322
|
+
["br", false, true, true, true, false, :any, true,
|
323
|
+
"forced line break ",
|
324
|
+
EMPTY, nil, CORE_ATTRS, CLEAR_ATTRS, [],],
|
325
|
+
["button", false, false, false, false, false, :any, true,
|
326
|
+
"push button ",
|
327
|
+
[HTML_FLOW, MODIFIER], nil, BUTTON_ATTRS, [], [],],
|
328
|
+
["caption", false, false, false, false, false, :any, false,
|
329
|
+
"table caption ",
|
330
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
331
|
+
["center", false, true, false, false, true, :loose, false,
|
332
|
+
"shorthand for div align=center ",
|
333
|
+
HTML_FLOW, nil, [], HTML_ATTRS, [],],
|
334
|
+
["cite", false, false, false, false, false, :any, true, "citation",
|
335
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
336
|
+
["code", false, false, false, false, false, :any, true,
|
337
|
+
"computer code fragment",
|
338
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
339
|
+
["col", false, true, true, true, false, :any, false, "table column ",
|
340
|
+
EMPTY, nil, COL_ATTRS, [], [],],
|
341
|
+
["colgroup", false, true, false, false, false, :any, false,
|
342
|
+
"table column group ",
|
343
|
+
COL_ELT, "col", COL_ATTRS, [], [],],
|
344
|
+
["dd", false, true, false, false, false, :any, false,
|
345
|
+
"definition description ",
|
346
|
+
HTML_FLOW, nil, HTML_ATTRS, [], [],],
|
347
|
+
["del", false, false, false, false, false, :any, true,
|
348
|
+
"deleted text ",
|
349
|
+
HTML_FLOW, nil, EDIT_ATTRS, [], [],],
|
350
|
+
["dfn", false, false, false, false, false, :any, true,
|
351
|
+
"instance definition",
|
352
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
353
|
+
["dir", false, false, false, false, true, :loose, false,
|
354
|
+
"directory list",
|
355
|
+
BLOCKLI_ELT, "li", [], COMPACT_ATTRS, [],],
|
356
|
+
["div", false, false, false, false, false, :any, false,
|
357
|
+
"generic language/style container",
|
358
|
+
HTML_FLOW, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
359
|
+
["dl", false, false, false, false, false, :any, false,
|
360
|
+
"definition list ",
|
361
|
+
DL_CONTENTS, "dd", HTML_ATTRS, COMPACT_ATTR, [],],
|
362
|
+
["dt", false, true, false, false, false, :any, false,
|
363
|
+
"definition term ",
|
364
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
365
|
+
["em", false, true, false, false, false, :any, true,
|
366
|
+
"emphasis",
|
367
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
368
|
+
["embed", false, true, false, false, true, :loose, true,
|
369
|
+
"generic embedded object ",
|
370
|
+
EMPTY, nil, EMBED_ATTRS, [], [],],
|
371
|
+
["fieldset", false, false, false, false, false, :any, false,
|
372
|
+
"form control group ",
|
373
|
+
FIELDSET_CONTENTS, nil, HTML_ATTRS, [], [],],
|
374
|
+
["font", false, true, false, false, true, :loose, true,
|
375
|
+
"local change to font ",
|
376
|
+
HTML_INLINE, nil, [], FONT_ATTRS, [],],
|
377
|
+
["form", false, false, false, false, false, :any, false,
|
378
|
+
"interactive form ",
|
379
|
+
FORM_CONTENTS, "fieldset", FORM_ATTRS, TARGET_ATTR, ACTION_ATTR,],
|
380
|
+
["frame", false, true, true, true, false, :frameset, false,
|
381
|
+
"subwindow ",
|
382
|
+
EMPTY, nil, [], FRAME_ATTRS, [],],
|
383
|
+
["frameset", false, false, false, false, false, :frameset, false,
|
384
|
+
"window subdivision",
|
385
|
+
FRAMESET_CONTENTS, "noframes", [], FRAMESET_ATTRS, [],],
|
386
|
+
["htrue", false, false, false, false, false, :any, false,
|
387
|
+
"heading ",
|
388
|
+
HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
389
|
+
["htrue", false, false, false, false, false, :any, false,
|
390
|
+
"heading ",
|
391
|
+
HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
392
|
+
["htrue", false, false, false, false, false, :any, false,
|
393
|
+
"heading ",
|
394
|
+
HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
395
|
+
["h4", false, false, false, false, false, :any, false,
|
396
|
+
"heading ",
|
397
|
+
HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
398
|
+
["h5", false, false, false, false, false, :any, false,
|
399
|
+
"heading ",
|
400
|
+
HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
401
|
+
["h6", false, false, false, false, false, :any, false,
|
402
|
+
"heading ",
|
403
|
+
HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
404
|
+
["head", true, true, false, false, false, :any, false,
|
405
|
+
"document head ",
|
406
|
+
HEAD_CONTENTS, nil, HEAD_ATTRS, [], [],],
|
407
|
+
["hr", false, true, true, true, false, :any, false,
|
408
|
+
"horizontal rule ",
|
409
|
+
EMPTY, nil, HTML_ATTRS, HR_DEPR, [],],
|
410
|
+
["html", true, true, false, false, false, :any, false,
|
411
|
+
"document root element ",
|
412
|
+
HTML_CONTENT, nil, I18N_ATTRS, VERSION_ATTR, [],],
|
413
|
+
["i", false, true, false, false, false, :any, true,
|
414
|
+
"italic text style",
|
415
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
416
|
+
["iframe", false, false, false, false, false, :any, true,
|
417
|
+
"inline subwindow ",
|
418
|
+
HTML_FLOW, nil, [], IFRAME_ATTRS, [],],
|
419
|
+
["img", false, true, true, true, false, :any, true,
|
420
|
+
"embedded image ",
|
421
|
+
EMPTY, nil, IMG_ATTRS, ALIGN_ATTR, SRC_ALT_ATTRS,],
|
422
|
+
["input", false, true, true, true, false, :any, true,
|
423
|
+
"form control ",
|
424
|
+
EMPTY, nil, INPUT_ATTRS, ALIGN_ATTR, [],],
|
425
|
+
["ins", false, false, false, false, false, :any, true,
|
426
|
+
"inserted text",
|
427
|
+
HTML_FLOW, nil, EDIT_ATTRS, [], [],],
|
428
|
+
["isindex", false, true, true, true, true, :loose, false,
|
429
|
+
"single line prompt ",
|
430
|
+
EMPTY, nil, [], PROMPT_ATTRS, [],],
|
431
|
+
["kbd", false, false, false, false, false, :any, true,
|
432
|
+
"text to be entered by the user",
|
433
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
434
|
+
["label", false, false, false, false, false, :any, true,
|
435
|
+
"form field label text ",
|
436
|
+
[HTML_INLINE, MODIFIER], nil, LABEL_ATTRS, [], [],],
|
437
|
+
["legend", false, false, false, false, false, :any, false,
|
438
|
+
"fieldset legend ",
|
439
|
+
HTML_INLINE, nil, LEGEND_ATTRS, ALIGN_ATTR, [],],
|
440
|
+
["li", false, true, true, false, false, :any, false,
|
441
|
+
"list item ",
|
442
|
+
HTML_FLOW, nil, HTML_ATTRS, [], [],],
|
443
|
+
["link", false, true, true, true, false, :any, false,
|
444
|
+
"a media-independent link ",
|
445
|
+
EMPTY, nil, LINK_ATTRS, TARGET_ATTR, [],],
|
446
|
+
["map", false, false, false, false, false, :any, true,
|
447
|
+
"client-side image map ",
|
448
|
+
MAP_CONTENTS, nil, HTML_ATTRS, [], NAME_ATTR,],
|
449
|
+
["menu", false, false, false, false, true, :loose, false,
|
450
|
+
"menu list ",
|
451
|
+
BLOCKLI_ELT, nil, [], COMPACT_ATTRS, [],],
|
452
|
+
["meta", false, true, true, true, false, :any, false,
|
453
|
+
"generic metainformation ",
|
454
|
+
EMPTY, nil, META_ATTRS, [], CONTENT_ATTR,],
|
455
|
+
["noframes", false, false, false, false, false, :frameset, false,
|
456
|
+
"alternate content container for non frame-based rendering ",
|
457
|
+
NOFRAMES_CONTENT, "body", HTML_ATTRS, [], [],],
|
458
|
+
["noscript", false, false, false, false, false, :any, false,
|
459
|
+
"alternate content container for non script-based rendering ",
|
460
|
+
HTML_FLOW, "div", HTML_ATTRS, [], [],],
|
461
|
+
["object", false, false, false, false, false, :any, true,
|
462
|
+
"generic embedded object ",
|
463
|
+
OBJECT_CONTENTS, "div", OBJECT_ATTRS, OBJECT_DEPR, [],],
|
464
|
+
["ol", false, false, false, false, false, :any, false,
|
465
|
+
"ordered list ",
|
466
|
+
LI_ELT, "li", HTML_ATTRS, OL_ATTRS, [],],
|
467
|
+
["optgroup", false, false, false, false, false, :any, false,
|
468
|
+
"option group ",
|
469
|
+
OPTION_ELT, "option", OPTGROUP_ATTRS, [], LABEL_ATTR,],
|
470
|
+
["option", false, true, false, false, false, :any, false,
|
471
|
+
"selectable choice ",
|
472
|
+
HTML_PCDATA, nil, OPTION_ATTRS, [], [],],
|
473
|
+
["p", false, true, false, false, false, :any, false,
|
474
|
+
"paragraph ",
|
475
|
+
HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
|
476
|
+
["param", false, true, true, true, false, :any, false,
|
477
|
+
"named property value ",
|
478
|
+
EMPTY, nil, PARAM_ATTRS, [], NAME_ATTR,],
|
479
|
+
["pre", false, false, false, false, false, :any, false,
|
480
|
+
"preformatted text ",
|
481
|
+
PRE_CONTENT, nil, HTML_ATTRS, WIDTH_ATTR, [],],
|
482
|
+
["q", false, false, false, false, false, :any, true,
|
483
|
+
"short inline quotation ",
|
484
|
+
HTML_INLINE, nil, QUOTE_ATTRS, [], [],],
|
485
|
+
["s", false, true, false, false, true, :loose, true,
|
486
|
+
"strike-through text style",
|
487
|
+
HTML_INLINE, nil, [], HTML_ATTRS, [],],
|
488
|
+
["samp", false, false, false, false, false, :any, true,
|
489
|
+
"sample program output, scripts, etc.",
|
490
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
491
|
+
["script", false, false, false, false, false, :any, true,
|
492
|
+
"script statements ",
|
493
|
+
HTML_CDATA, nil, SCRIPT_ATTRS, LANGUAGE_ATTR, TYPE_ATTR,],
|
494
|
+
["select", false, false, false, false, false, :any, true,
|
495
|
+
"option selector ",
|
496
|
+
SELECT_CONTENT, nil, SELECT_ATTRS, [], [],],
|
497
|
+
["small", false, true, false, false, false, :any, true,
|
498
|
+
"small text style",
|
499
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
500
|
+
["span", false, false, false, false, false, :any, true,
|
501
|
+
"generic language/style container ",
|
502
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
503
|
+
["strike", false, true, false, false, true, :loose, true,
|
504
|
+
"strike-through text",
|
505
|
+
HTML_INLINE, nil, [], HTML_ATTRS, [],],
|
506
|
+
["strong", false, true, false, false, false, :any, true,
|
507
|
+
"strong emphasis",
|
508
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
509
|
+
["style", false, false, false, false, false, :any, false,
|
510
|
+
"style info ",
|
511
|
+
HTML_CDATA, nil, STYLE_ATTRS, [], TYPE_ATTR,],
|
512
|
+
["sub", false, true, false, false, false, :any, true,
|
513
|
+
"subscript",
|
514
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
515
|
+
["sup", false, true, false, false, false, :any, true,
|
516
|
+
"superscript ",
|
517
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
518
|
+
["table", false, false, false, false, false, :any, false,
|
519
|
+
"",
|
520
|
+
TABLE_CONTENTS, "tr", TABLE_ATTRS, TABLE_DEPR, [],],
|
521
|
+
["tbody", true, false, false, false, false, :any, false,
|
522
|
+
"table body ",
|
523
|
+
TR_ELT, "tr", TALIGN_ATTRS, [], [],],
|
524
|
+
["td", false, false, false, false, false, :any, false,
|
525
|
+
"table data cell",
|
526
|
+
HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, [],],
|
527
|
+
["textarea", false, false, false, false, false, :any, true,
|
528
|
+
"multi-line text field ",
|
529
|
+
HTML_PCDATA, nil, TEXTAREA_ATTRS, [], ROWS_COLS_ATTR,],
|
530
|
+
["tfoot", false, true, false, false, false, :any, false,
|
531
|
+
"table footer ",
|
532
|
+
TR_ELT, "tr", TALIGN_ATTRS, [], [],],
|
533
|
+
["th", false, true, false, false, false, :any, false,
|
534
|
+
"table header cell",
|
535
|
+
HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, [],],
|
536
|
+
["thead", false, true, false, false, false, :any, false,
|
537
|
+
"table header ",
|
538
|
+
TR_ELT, "tr", TALIGN_ATTRS, [], [],],
|
539
|
+
["title", false, false, false, false, false, :any, false,
|
540
|
+
"document title ",
|
541
|
+
HTML_PCDATA, nil, I18N_ATTRS, [], [],],
|
542
|
+
["tr", false, false, false, false, false, :any, false,
|
543
|
+
"table row ",
|
544
|
+
TR_CONTENTS, "td", TALIGN_ATTRS, BGCOLOR_ATTR, [],],
|
545
|
+
["tt", false, true, false, false, false, :any, true,
|
546
|
+
"teletype or monospaced text style",
|
547
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
548
|
+
["u", false, true, false, false, true, :loose, true,
|
549
|
+
"underlined text style",
|
550
|
+
HTML_INLINE, nil, [], HTML_ATTRS, [],],
|
551
|
+
["ul", false, false, false, false, false, :any, false,
|
552
|
+
"unordered list ",
|
553
|
+
LI_ELT, "li", HTML_ATTRS, UL_DEPR, [],],
|
554
|
+
["var", false, false, false, false, false, :any, true,
|
555
|
+
"instance of a variable or program argument",
|
556
|
+
HTML_INLINE, nil, HTML_ATTRS, [], [],],
|
557
|
+
].each do |descriptor|
|
558
|
+
name = descriptor[0]
|
559
|
+
|
560
|
+
begin
|
561
|
+
d = Desc.new(*descriptor)
|
562
|
+
|
563
|
+
# flatten all the attribute lists (Ruby1.9, *[a,b,c] can be
|
564
|
+
# used to flatten a literal list, but not in Ruby1.8).
|
565
|
+
d[:subelts] = d[:subelts].flatten
|
566
|
+
d[:attrs_opt] = d[:attrs_opt].flatten
|
567
|
+
d[:attrs_depr] = d[:attrs_depr].flatten
|
568
|
+
d[:attrs_req] = d[:attrs_req].flatten
|
569
|
+
rescue => e
|
570
|
+
p(name)
|
571
|
+
raise e
|
572
|
+
end
|
573
|
+
|
574
|
+
DefaultDescriptions[name] = d
|
575
|
+
end
|
576
|
+
end
|
577
|
+
end
|
578
|
+
end
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
|
-
module
|
4
|
+
module HTML4
|
3
5
|
class EntityDescription < Struct.new(:value, :name, :description); end
|
4
6
|
|
5
7
|
class EntityLookup
|
6
8
|
###
|
7
9
|
# Look up entity with +name+
|
8
|
-
def []
|
10
|
+
def [](name)
|
9
11
|
(val = get(name)) && val.value
|
10
12
|
end
|
11
13
|
end
|
@@ -1,17 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
|
-
module
|
4
|
+
module HTML4
|
3
5
|
###
|
4
|
-
# Nokogiri lets you write a SAX parser to process HTML but get HTML
|
5
|
-
# correction features.
|
6
|
+
# Nokogiri lets you write a SAX parser to process HTML but get HTML correction features.
|
6
7
|
#
|
7
|
-
# See Nokogiri::
|
8
|
-
# SAX parser with HTML.
|
8
|
+
# See Nokogiri::HTML4::SAX::Parser for a basic example of using a SAX parser with HTML.
|
9
9
|
#
|
10
10
|
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
11
11
|
module SAX
|
12
12
|
###
|
13
|
-
# This class lets you perform SAX style parsing on HTML with HTML
|
14
|
-
# error correction.
|
13
|
+
# This class lets you perform SAX style parsing on HTML with HTML error correction.
|
15
14
|
#
|
16
15
|
# Here is a basic usage example:
|
17
16
|
#
|
@@ -21,40 +20,42 @@ module Nokogiri
|
|
21
20
|
# end
|
22
21
|
# end
|
23
22
|
#
|
24
|
-
# parser = Nokogiri::
|
23
|
+
# parser = Nokogiri::HTML4::SAX::Parser.new(MyDoc.new)
|
25
24
|
# parser.parse(File.read(ARGV[0], mode: 'rb'))
|
26
25
|
#
|
27
26
|
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
28
27
|
class Parser < Nokogiri::XML::SAX::Parser
|
29
28
|
###
|
30
29
|
# Parse html stored in +data+ using +encoding+
|
31
|
-
def parse_memory
|
32
|
-
raise
|
33
|
-
return
|
30
|
+
def parse_memory(data, encoding = "UTF-8")
|
31
|
+
raise TypeError unless String === data
|
32
|
+
return if data.empty?
|
33
|
+
|
34
34
|
ctx = ParserContext.memory(data, encoding)
|
35
35
|
yield ctx if block_given?
|
36
|
-
ctx.parse_with
|
36
|
+
ctx.parse_with(self)
|
37
37
|
end
|
38
38
|
|
39
39
|
###
|
40
40
|
# Parse given +io+
|
41
|
-
def parse_io
|
41
|
+
def parse_io(io, encoding = "UTF-8")
|
42
42
|
check_encoding(encoding)
|
43
43
|
@encoding = encoding
|
44
44
|
ctx = ParserContext.io(io, ENCODINGS[encoding])
|
45
45
|
yield ctx if block_given?
|
46
|
-
ctx.parse_with
|
46
|
+
ctx.parse_with(self)
|
47
47
|
end
|
48
48
|
|
49
49
|
###
|
50
50
|
# Parse a file with +filename+
|
51
|
-
def parse_file
|
51
|
+
def parse_file(filename, encoding = "UTF-8")
|
52
52
|
raise ArgumentError unless filename
|
53
53
|
raise Errno::ENOENT unless File.exist?(filename)
|
54
54
|
raise Errno::EISDIR if File.directory?(filename)
|
55
|
+
|
55
56
|
ctx = ParserContext.file(filename, encoding)
|
56
57
|
yield ctx if block_given?
|
57
|
-
ctx.parse_with
|
58
|
+
ctx.parse_with(self)
|
58
59
|
end
|
59
60
|
end
|
60
61
|
end
|