html5 0.1.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -2
- data/Manifest.txt +61 -2
- data/README +41 -5
- data/Rakefile.rb +22 -6
- data/{parse.rb → bin/html5} +11 -11
- data/lib/core_ext/string.rb +17 -0
- data/lib/html5/constants.rb +228 -0
- data/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/html5/filters/validator.rb +830 -0
- data/lib/html5/html5parser.rb +25 -25
- data/lib/html5/html5parser/after_body_phase.rb +3 -3
- data/lib/html5/html5parser/after_frameset_phase.rb +3 -4
- data/lib/html5/html5parser/after_head_phase.rb +6 -6
- data/lib/html5/html5parser/before_head_phase.rb +1 -1
- data/lib/html5/html5parser/in_body_phase.rb +54 -48
- data/lib/html5/html5parser/in_caption_phase.rb +7 -6
- data/lib/html5/html5parser/in_cell_phase.rb +3 -3
- data/lib/html5/html5parser/in_column_group_phase.rb +1 -1
- data/lib/html5/html5parser/in_frameset_phase.rb +5 -5
- data/lib/html5/html5parser/in_head_phase.rb +10 -10
- data/lib/html5/html5parser/in_row_phase.rb +4 -2
- data/lib/html5/html5parser/in_select_phase.rb +7 -6
- data/lib/html5/html5parser/in_table_body_phase.rb +8 -5
- data/lib/html5/html5parser/in_table_phase.rb +12 -7
- data/lib/html5/html5parser/initial_phase.rb +5 -6
- data/lib/html5/html5parser/phase.rb +5 -9
- data/lib/html5/html5parser/root_element_phase.rb +1 -2
- data/lib/html5/html5parser/trailing_end_phase.rb +3 -3
- data/lib/html5/inputstream.rb +25 -31
- data/lib/html5/liberalxmlparser.rb +2 -2
- data/lib/html5/sanitizer.rb +6 -6
- data/lib/html5/serializer/htmlserializer.rb +2 -3
- data/lib/html5/sniffer.rb +45 -0
- data/lib/html5/tokenizer.rb +57 -59
- data/lib/html5/treebuilders/rexml.rb +7 -6
- data/lib/html5/treebuilders/simpletree.rb +1 -1
- data/lib/html5/treewalkers/base.rb +8 -0
- data/lib/html5/version.rb +3 -0
- data/testdata/encoding/chardet/test_big5.txt +51 -0
- data/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/testdata/encoding/tests1.dat +394 -0
- data/testdata/encoding/tests2.dat +81 -0
- data/testdata/sanitizer/tests1.dat +416 -0
- data/testdata/serializer/core.test +104 -0
- data/testdata/serializer/injectmeta.test +65 -0
- data/testdata/serializer/optionaltags.test +900 -0
- data/testdata/serializer/options.test +60 -0
- data/testdata/serializer/whitespace.test +51 -0
- data/testdata/sites/google-results.htm +1 -0
- data/testdata/sites/python-ref-import.htm +1 -0
- data/testdata/sites/web-apps-old.htm +1 -0
- data/testdata/sites/web-apps.htm +34275 -0
- data/testdata/sniffer/htmlOrFeed.json +43 -0
- data/testdata/tokenizer/contentModelFlags.test +48 -0
- data/testdata/tokenizer/entities.test +2339 -0
- data/testdata/tokenizer/escapeFlag.test +21 -0
- data/testdata/tokenizer/test1.test +172 -0
- data/testdata/tokenizer/test2.test +129 -0
- data/testdata/tokenizer/test3.test +367 -0
- data/testdata/tokenizer/test4.test +198 -0
- data/testdata/tree-construction/tests1.dat +1950 -0
- data/testdata/tree-construction/tests2.dat +773 -0
- data/testdata/tree-construction/tests3.dat +270 -0
- data/testdata/tree-construction/tests4.dat +60 -0
- data/testdata/tree-construction/tests5.dat +175 -0
- data/testdata/tree-construction/tests6.dat +196 -0
- data/testdata/validator/attributes.test +1035 -0
- data/testdata/validator/base-href-attribute.test +787 -0
- data/testdata/validator/base-target-attribute.test +35 -0
- data/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/testdata/validator/classattribute.test +152 -0
- data/testdata/validator/contenteditableattribute.test +59 -0
- data/testdata/validator/contextmenuattribute.test +115 -0
- data/testdata/validator/dirattribute.test +59 -0
- data/testdata/validator/draggableattribute.test +63 -0
- data/testdata/validator/html-xmlns-attribute.test +23 -0
- data/testdata/validator/idattribute.test +115 -0
- data/testdata/validator/inputattributes.test +2795 -0
- data/testdata/validator/irrelevantattribute.test +63 -0
- data/testdata/validator/langattribute.test +5579 -0
- data/testdata/validator/li-value-attribute.test +7 -0
- data/testdata/validator/link-href-attribute.test +7 -0
- data/testdata/validator/link-hreflang-attribute.test +7 -0
- data/testdata/validator/link-rel-attribute.test +271 -0
- data/testdata/validator/ol-start-attribute.test +7 -0
- data/testdata/validator/starttags.test +375 -0
- data/testdata/validator/style-scoped-attribute.test +7 -0
- data/testdata/validator/tabindexattribute.test +79 -0
- data/tests/preamble.rb +7 -17
- data/tests/test_encoding.rb +1 -1
- data/tests/test_lxp.rb +16 -0
- data/tests/test_parser.rb +2 -2
- data/tests/test_sniffer.rb +27 -0
- data/tests/test_treewalkers.rb +41 -22
- data/tests/test_validator.rb +31 -0
- metadata +65 -6
@@ -0,0 +1,30 @@
|
|
1
|
+
# adapted from feedvalidator, original copyright license is
|
2
|
+
#
|
3
|
+
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in all
|
13
|
+
# copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
|
23
|
+
|
24
|
+
# mime_re = Regexp.new('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$')
|
25
|
+
|
26
|
+
def is_valid_mime_type(value)
|
27
|
+
# !!mime_re.match(value)
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# adapted from feedvalidator, original copyright license is
|
2
|
+
#
|
3
|
+
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in all
|
13
|
+
# copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
|
23
|
+
iana_schemes = [ # http://www.iana.org/assignments/uri-schemes.html
|
24
|
+
"ftp", "http", "gopher", "mailto", "news", "nntp", "telnet", "wais",
|
25
|
+
"file", "prospero", "z39.50s", "z39.50r", "cid", "mid", "vemmi",
|
26
|
+
"service", "imap", "nfs", "acap", "rtsp", "tip", "pop", "data", "dav",
|
27
|
+
"opaquelocktoken", "sip", "sips", "tel", "fax", "modem", "ldap",
|
28
|
+
"https", "soap.beep", "soap.beeps", "xmlrpc.beep", "xmlrpc.beeps",
|
29
|
+
"urn", "go", "h323", "ipp", "tftp", "mupdate", "pres", "im", "mtqp",
|
30
|
+
"iris.beep", "dict", "snmp", "crid", "tag", "dns", "info"
|
31
|
+
]
|
32
|
+
ALLOWED_SCHEMES = iana_schemes + ['javascript']
|
33
|
+
|
34
|
+
RFC2396 = Regexp.new("^([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]*$", Regexp::MULTILINE)
|
35
|
+
rfc2396_full = Regexp.new("[a-zA-Z][0-9a-zA-Z+\\-\\.]*:(//)?[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]+$")
|
36
|
+
URN = Regexp.new("^[Uu][Rr][Nn]:[a-zA-Z0-9][a-zA-Z0-9-]{1,31}:([a-zA-Z0-9()+,\.:=@;$_!*'\-]|%[0-9A-Fa-f]{2})+$")
|
37
|
+
TAG = Regexp.new("^tag:([a-z0-9\\-\._]+?@)?[a-z0-9\.\-]+?,\d{4}(-\d{2}(-\d{2})?)?:[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*(#[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*)?$")
|
38
|
+
|
39
|
+
def is_valid_uri(value, uri_pattern = RFC2396)
|
40
|
+
scheme = value.split(':').first
|
41
|
+
scheme.downcase! if scheme
|
42
|
+
if scheme == 'tag'
|
43
|
+
if !TAG.match(value)
|
44
|
+
return false, "invalid-tag-uri"
|
45
|
+
end
|
46
|
+
elsif scheme == "urn"
|
47
|
+
if !URN.match(value)
|
48
|
+
return false, "invalid-urn"
|
49
|
+
end
|
50
|
+
elsif uri_pattern.match(value).to_a.reject{|i| i == ''}.compact.length == 0 || uri_pattern.match(value)[0] != value
|
51
|
+
urichars = Regexp.new("^[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]$", Regexp::MULTILINE)
|
52
|
+
if value.length > 0
|
53
|
+
value.each_byte do |b|
|
54
|
+
if b < 128 and !urichars.match([b].pack('c*'))
|
55
|
+
return false, "invalid-uri-char"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
else
|
59
|
+
begin
|
60
|
+
if uri_pattern.match(value.encode('idna'))
|
61
|
+
return false, "uri-not-iri"
|
62
|
+
end
|
63
|
+
rescue
|
64
|
+
end
|
65
|
+
return false, "invalid-uri"
|
66
|
+
end
|
67
|
+
elsif ['http','ftp'].include?(scheme)
|
68
|
+
if !value.match(%r{^\w+://[^/].*})
|
69
|
+
return false, "invalid-http-or-ftp-uri"
|
70
|
+
end
|
71
|
+
elsif value.index(':') && scheme.match(/^[a-z]+$/) && !ALLOWED_SCHEMES.include?(scheme)
|
72
|
+
return false, "invalid-scheme"
|
73
|
+
end
|
74
|
+
return true, ""
|
75
|
+
end
|
76
|
+
|
77
|
+
def is_valid_iri(value)
|
78
|
+
begin
|
79
|
+
if value.length > 0
|
80
|
+
value = value.encode('idna')
|
81
|
+
end
|
82
|
+
rescue
|
83
|
+
end
|
84
|
+
is_valid_uri(value)
|
85
|
+
end
|
86
|
+
|
87
|
+
def is_valid_fully_qualified_uri(value)
|
88
|
+
is_valid_uri(value, rfc2396_full)
|
89
|
+
end
|
@@ -0,0 +1,830 @@
|
|
1
|
+
# HTML 5 conformance checker
|
2
|
+
#
|
3
|
+
# Warning: this module is experimental, incomplete, and subject to removal at any time.
|
4
|
+
#
|
5
|
+
# Usage:
|
6
|
+
# >>> from html5lib.html5parser import HTMLParser
|
7
|
+
# >>> from html5lib.filters.validator import HTMLConformanceChecker
|
8
|
+
# >>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
|
9
|
+
# >>> p.parse('<!doctype html>\n<html foo=bar></html>')
|
10
|
+
# <<class 'html5lib.treebuilders.simpletree.Document'> nil>
|
11
|
+
# >>> p.errors
|
12
|
+
# [((2, 14), 'unknown-attribute', {'attributeName' => u'foo', 'tagName' => u'html'})]
|
13
|
+
|
14
|
+
require 'html5/constants'
|
15
|
+
require 'html5/filters/base'
|
16
|
+
require 'html5/filters/iso639codes'
|
17
|
+
require 'html5/filters/rfc3987'
|
18
|
+
require 'html5/filters/rfc2046'
|
19
|
+
|
20
|
+
def _(str); str; end
|
21
|
+
|
22
|
+
class String
|
23
|
+
# lifted from rails
|
24
|
+
def underscore()
|
25
|
+
self.gsub(/::/, '/').
|
26
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
27
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
28
|
+
tr("-", "_").
|
29
|
+
downcase
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
HTML5::E.update({
|
34
|
+
"unknown-start-tag" =>
|
35
|
+
_("Unknown start tag <%(tagName)>."),
|
36
|
+
"unknown-attribute" =>
|
37
|
+
_("Unknown '%(attributeName)' attribute on <%(tagName)>."),
|
38
|
+
"missing-required-attribute" =>
|
39
|
+
_("The '%(attributeName)' attribute is required on <%(tagName)>."),
|
40
|
+
"unknown-input-type" =>
|
41
|
+
_("Illegal value for attribute on <input type='%(inputType)'>."),
|
42
|
+
"attribute-not-allowed-on-this-input-type" =>
|
43
|
+
_("The '%(attributeName)' attribute is not allowed on <input type=%(inputType)>."),
|
44
|
+
"deprecated-attribute" =>
|
45
|
+
_("This attribute is deprecated: '%(attributeName)' attribute on <%(tagName)>."),
|
46
|
+
"duplicate-value-in-token-list" =>
|
47
|
+
_("Duplicate value in token list: '%(attributeValue)' in '%(attributeName)' attribute on <%(tagName)>."),
|
48
|
+
"invalid-attribute-value" =>
|
49
|
+
_("Invalid attribute value: '%(attributeName)' attribute on <%(tagName)>."),
|
50
|
+
"space-in-id" =>
|
51
|
+
_("Whitespace is not allowed here: '%(attributeName)' attribute on <%(tagName)>."),
|
52
|
+
"duplicate-id" =>
|
53
|
+
_("This ID was already defined earlier: 'id' attribute on <%(tagName)>."),
|
54
|
+
"attribute-value-can-not-be-blank" =>
|
55
|
+
_("This value can not be blank: '%(attributeName)' attribute on <%(tagName)>."),
|
56
|
+
"id-does-not-exist" =>
|
57
|
+
_("This value refers to a non-existent ID: '%(attributeName)' attribute on <%(tagName)>."),
|
58
|
+
"invalid-enumerated-value" =>
|
59
|
+
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
|
60
|
+
"invalid-boolean-value" =>
|
61
|
+
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
|
62
|
+
"contextmenu-must-point-to-menu" =>
|
63
|
+
_("The contextmenu attribute must point to an ID defined on a <menu> element."),
|
64
|
+
"invalid-lang-code" =>
|
65
|
+
_("Invalid language code: '%(attributeName)' attibute on <%(tagName)>."),
|
66
|
+
"invalid-integer-value" =>
|
67
|
+
_("Value must be an integer: '%(attributeName)' attribute on <%tagName)>."),
|
68
|
+
"invalid-root-namespace" =>
|
69
|
+
_("Root namespace must be 'http://www.w3.org/1999/xhtml', or omitted."),
|
70
|
+
"invalid-browsing-context" =>
|
71
|
+
_("Value must be one of ('_self', '_parent', '_top'), or a name that does not start with '_' => '%(attributeName)' attribute on <%(tagName)>."),
|
72
|
+
"invalid-tag-uri" =>
|
73
|
+
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
74
|
+
"invalid-urn" =>
|
75
|
+
_("Invalid URN: '%(attributeName)' attribute on <%(tagName)>."),
|
76
|
+
"invalid-uri-char" =>
|
77
|
+
_("Illegal character in URI: '%(attributeName)' attribute on <%(tagName)>."),
|
78
|
+
"uri-not-iri" =>
|
79
|
+
_("Expected a URI but found an IRI: '%(attributeName)' attribute on <%(tagName)>."),
|
80
|
+
"invalid-uri" =>
|
81
|
+
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
82
|
+
"invalid-http-or-ftp-uri" =>
|
83
|
+
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
84
|
+
"invalid-scheme" =>
|
85
|
+
_("Unregistered URI scheme: '%(attributeName)' attribute on <%(tagName)>."),
|
86
|
+
"invalid-rel" =>
|
87
|
+
_("Invalid link relation: '%(attributeName)' attribute on <%(tagName)>."),
|
88
|
+
"invalid-mime-type" =>
|
89
|
+
_("Invalid MIME type: '%(attributeName)' attribute on <%(tagName)>."),
|
90
|
+
})
|
91
|
+
|
92
|
+
|
93
|
+
class HTMLConformanceChecker < HTML5::Filters::Base
|
94
|
+
|
95
|
+
@@global_attributes = %w[class contenteditable contextmenu dir
|
96
|
+
draggable id irrelevant lang ref tabindex template
|
97
|
+
title onabort onbeforeunload onblur onchange onclick
|
98
|
+
oncontextmenu ondblclick ondrag ondragend ondragenter
|
99
|
+
ondragleave ondragover ondragstart ondrop onerror
|
100
|
+
onfocus onkeydown onkeypress onkeyup onload onmessage
|
101
|
+
onmousedown onmousemove onmouseout onmouseover onmouseup
|
102
|
+
onmousewheel onresize onscroll onselect onsubmit onunload]
|
103
|
+
# XXX lang in HTML only, xml:lang in XHTML only
|
104
|
+
# XXX validate ref, template
|
105
|
+
|
106
|
+
@@allowed_attribute_map = {
|
107
|
+
'html' => %w[xmlns],
|
108
|
+
'head' => [],
|
109
|
+
'title' => [],
|
110
|
+
'base' => %w[href target],
|
111
|
+
'link' => %w[href rel media hreflang type],
|
112
|
+
'meta' => %w[name http-equiv content charset], # XXX charset in HTML only
|
113
|
+
'style' => %w[media type scoped],
|
114
|
+
'body' => [],
|
115
|
+
'section' => [],
|
116
|
+
'nav' => [],
|
117
|
+
'article' => [],
|
118
|
+
'blockquote' => %w[cite],
|
119
|
+
'aside' => [],
|
120
|
+
'h1' => [],
|
121
|
+
'h2' => [],
|
122
|
+
'h3' => [],
|
123
|
+
'h4' => [],
|
124
|
+
'h5' => [],
|
125
|
+
'h6' => [],
|
126
|
+
'header' => [],
|
127
|
+
'footer' => [],
|
128
|
+
'address' => [],
|
129
|
+
'p' => [],
|
130
|
+
'hr' => [],
|
131
|
+
'br' => [],
|
132
|
+
'dialog' => [],
|
133
|
+
'pre' => [],
|
134
|
+
'ol' => %w[start],
|
135
|
+
'ul' => [],
|
136
|
+
'li' => %w[value], # XXX depends on parent
|
137
|
+
'dl' => [],
|
138
|
+
'dt' => [],
|
139
|
+
'dd' => [],
|
140
|
+
'a' => %w[href target ping rel media hreflang type],
|
141
|
+
'q' => %w[cite],
|
142
|
+
'cite' => [],
|
143
|
+
'em' => [],
|
144
|
+
'strong' => [],
|
145
|
+
'small' => [],
|
146
|
+
'm' => [],
|
147
|
+
'dfn' => [],
|
148
|
+
'abbr' => [],
|
149
|
+
'time' => %w[datetime],
|
150
|
+
'meter' => %w[value min low high max optimum],
|
151
|
+
'progress' => %w[value max],
|
152
|
+
'code' => [],
|
153
|
+
'var' => [],
|
154
|
+
'samp' => [],
|
155
|
+
'kbd' => [],
|
156
|
+
'sup' => [],
|
157
|
+
'sub' => [],
|
158
|
+
'span' => [],
|
159
|
+
'i' => [],
|
160
|
+
'b' => [],
|
161
|
+
'bdo' => [],
|
162
|
+
'ins' => %w[cite datetime],
|
163
|
+
'del' => %w[cite datetime],
|
164
|
+
'figure' => [],
|
165
|
+
'img' => %w[alt src usemap ismap height width], # XXX ismap depends on parent
|
166
|
+
'iframe' => %w[src],
|
167
|
+
# <embed> handled separately
|
168
|
+
'object' => %w[data type usemap height width],
|
169
|
+
'param' => %w[name value],
|
170
|
+
'video' => %w[src autoplay start loopstart loopend end loopcount controls],
|
171
|
+
'audio' => %w[src autoplay start loopstart loopend end loopcount controls],
|
172
|
+
'source' => %w[src type media],
|
173
|
+
'canvas' => %w[height width],
|
174
|
+
'map' => [],
|
175
|
+
'area' => %w[alt coords shape href target ping rel media hreflang type],
|
176
|
+
'table' => [],
|
177
|
+
'caption' => [],
|
178
|
+
'colgroup' => %w[span], # XXX only if element contains no <col> elements
|
179
|
+
'col' => %w[span],
|
180
|
+
'tbody' => [],
|
181
|
+
'thead' => [],
|
182
|
+
'tfoot' => [],
|
183
|
+
'tr' => [],
|
184
|
+
'td' => %w[colspan rowspan],
|
185
|
+
'th' => %w[colspan rowspan scope],
|
186
|
+
# all possible <input> attributes are listed here but <input> is really handled separately
|
187
|
+
'input' => %w[accept accesskey action alt autocomplete autofocus checked
|
188
|
+
disabled enctype form inputmode list maxlength method min
|
189
|
+
max name pattern step readonly replace required size src
|
190
|
+
tabindex target template value
|
191
|
+
],
|
192
|
+
'form' => %w[action method enctype accept name onsubmit onreset accept-charset
|
193
|
+
data replace
|
194
|
+
],
|
195
|
+
'button' => %w[action enctype method replace template name value type disabled form autofocus], # XXX may need matrix of acceptable attributes based on value of type attribute (like input)
|
196
|
+
'select' => %w[name size multiple disabled data accesskey form autofocus],
|
197
|
+
'optgroup' => %w[disabled label],
|
198
|
+
'option' => %w[selected disabled label value],
|
199
|
+
'textarea' => %w[maxlength name rows cols disabled readonly required form autofocus wrap accept],
|
200
|
+
'label' => %w[for accesskey form],
|
201
|
+
'fieldset' => %w[disabled form],
|
202
|
+
'output' => %w[form name for onforminput onformchange],
|
203
|
+
'datalist' => %w[data],
|
204
|
+
# XXX repetition model for repeating form controls
|
205
|
+
'script' => %w[src defer async type],
|
206
|
+
'noscript' => [],
|
207
|
+
'noembed' => [],
|
208
|
+
'event-source' => %w[src],
|
209
|
+
'details' => %w[open],
|
210
|
+
'datagrid' => %w[multiple disabled],
|
211
|
+
'command' => %w[type label icon hidden disabled checked radiogroup default],
|
212
|
+
'menu' => %w[type label autosubmit],
|
213
|
+
'datatemplate' => [],
|
214
|
+
'rule' => [],
|
215
|
+
'nest' => [],
|
216
|
+
'legend' => [],
|
217
|
+
'div' => [],
|
218
|
+
'font' => %w[style]
|
219
|
+
}
|
220
|
+
|
221
|
+
@@required_attribute_map = {
|
222
|
+
'link' => %w[href rel],
|
223
|
+
'bdo' => %w[dir],
|
224
|
+
'img' => %w[src],
|
225
|
+
'embed' => %w[src],
|
226
|
+
'object' => [], # XXX one of 'data' or 'type' is required
|
227
|
+
'param' => %w[name value],
|
228
|
+
'source' => %w[src],
|
229
|
+
'map' => %w[id]
|
230
|
+
}
|
231
|
+
|
232
|
+
@@input_type_allowed_attribute_map = {
|
233
|
+
'text' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required size tabindex value],
|
234
|
+
'password' => %w[accesskey autocomplete autofocus disabled form inputmode maxlength name pattern readonly required size tabindex value],
|
235
|
+
'checkbox' => %w[accesskey autofocus checked disabled form name required tabindex value],
|
236
|
+
'radio' => %w[accesskey autofocus checked disabled form name required tabindex value],
|
237
|
+
'button' => %w[accesskey autofocus disabled form name tabindex value],
|
238
|
+
'submit' => %w[accesskey action autofocus disabled enctype form method name replace tabindex target value],
|
239
|
+
'reset' => %w[accesskey autofocus disabled form name tabindex value],
|
240
|
+
'add' => %w[accesskey autofocus disabled form name tabindex template value],
|
241
|
+
'remove' => %w[accesskey autofocus disabled form name tabindex value],
|
242
|
+
'move-up' => %w[accesskey autofocus disabled form name tabindex value],
|
243
|
+
'move-down' => %w[accesskey autofocus disabled form name tabindex value],
|
244
|
+
'file' => %w[accept accesskey autofocus disabled form min max name required tabindex],
|
245
|
+
'hidden' => %w[disabled form name value],
|
246
|
+
'image' => %w[accesskey action alt autofocus disabled enctype form method name replace src tabindex target],
|
247
|
+
'datetime' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
248
|
+
'datetime-local' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
249
|
+
'date' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
250
|
+
'month' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
251
|
+
'week' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
252
|
+
'time' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
253
|
+
'number' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
254
|
+
'range' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
255
|
+
'email' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
|
256
|
+
'url' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
|
257
|
+
}
|
258
|
+
|
259
|
+
@@input_type_deprecated_attribute_map = {
|
260
|
+
'text' => ['size'],
|
261
|
+
'password' => ['size']
|
262
|
+
}
|
263
|
+
|
264
|
+
@@link_rel_values = %w[alternate archive archives author contact feed first begin start help icon index top contents toc last end license copyright next pingback prefetch prev previous search stylesheet sidebar tag up]
|
265
|
+
@@a_rel_values = %w[alternate archive archives author contact feed first begin start help index top contents toc last end license copyright next prev previous search sidebar tag up bookmark external nofollow]
|
266
|
+
|
267
|
+
def initialize(stream, *args)
|
268
|
+
super(HTML5::HTMLTokenizer.new(stream, *args))
|
269
|
+
@things_that_define_an_id = []
|
270
|
+
@things_that_point_to_an_id = []
|
271
|
+
@ids_we_have_known_and_loved = []
|
272
|
+
end
|
273
|
+
|
274
|
+
def each
|
275
|
+
__getobj__.each do |token|
|
276
|
+
method = "validate_#{token.fetch(:type, '-').to_s.underscore}_#{token.fetch(:name, '-').to_s.underscore}"
|
277
|
+
if respond_to?(method)
|
278
|
+
send(method, token){|t| yield t }
|
279
|
+
else
|
280
|
+
method = "validate_#{token.fetch(:type, '-').to_s.underscore}"
|
281
|
+
if respond_to?(method)
|
282
|
+
send(method, token) do |t|
|
283
|
+
yield t
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
yield token
|
288
|
+
end
|
289
|
+
eof do |t|
|
290
|
+
yield t
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
##########################################################################
|
295
|
+
# Start tag validation
|
296
|
+
##########################################################################
|
297
|
+
|
298
|
+
def validate_start_tag(token)
|
299
|
+
check_unknown_start_tag(token){|t| yield t}
|
300
|
+
check_start_tag_required_attributes(token) do |t|
|
301
|
+
yield t
|
302
|
+
end
|
303
|
+
check_start_tag_unknown_attributes(token) do |t|
|
304
|
+
yield t
|
305
|
+
end
|
306
|
+
check_attribute_values(token) do |t|
|
307
|
+
yield t
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def validate_start_tag_embed(token)
|
312
|
+
check_start_tag_required_attributes(token) do |t|
|
313
|
+
yield t
|
314
|
+
end
|
315
|
+
check_attribute_values(token) do |t|
|
316
|
+
yield t
|
317
|
+
end
|
318
|
+
# spec says "any attributes w/o namespace"
|
319
|
+
# so don't call check_start_tag_unknown_attributes
|
320
|
+
end
|
321
|
+
|
322
|
+
def validate_start_tag_input(token)
|
323
|
+
check_attribute_values(token) do |t|
|
324
|
+
yield t
|
325
|
+
end
|
326
|
+
attr_dict = Hash[*token[:data].collect{|(name, value)| [name.downcase, value]}.flatten]
|
327
|
+
input_type = attr_dict.fetch('type', "text")
|
328
|
+
if !@@input_type_allowed_attribute_map.keys().include?(input_type)
|
329
|
+
yield({:type => "ParseError",
|
330
|
+
:data => "unknown-input-type",
|
331
|
+
:datavars => {:attrValue => input_type}})
|
332
|
+
end
|
333
|
+
allowed_attributes = @@input_type_allowed_attribute_map.fetch(input_type, [])
|
334
|
+
attr_dict.each do |attr_name, attr_value|
|
335
|
+
if !@@allowed_attribute_map['input'].include?(attr_name)
|
336
|
+
yield({:type => "ParseError",
|
337
|
+
:data => "unknown-attribute",
|
338
|
+
:datavars => {"tagName" => "input",
|
339
|
+
"attributeName" => attr_name}})
|
340
|
+
elsif !allowed_attributes.include?(attr_name)
|
341
|
+
yield({:type => "ParseError",
|
342
|
+
:data => "attribute-not-allowed-on-this-input-type",
|
343
|
+
:datavars => {"attributeName" => attr_name,
|
344
|
+
"inputType" => input_type}})
|
345
|
+
end
|
346
|
+
if @@input_type_deprecated_attribute_map.fetch(input_type, []).include?(attr_name)
|
347
|
+
yield({:type => "ParseError",
|
348
|
+
:data => "deprecated-attribute",
|
349
|
+
:datavars => {"attributeName" => attr_name,
|
350
|
+
"inputType" => input_type}})
|
351
|
+
end
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
##########################################################################
|
356
|
+
# Start tag validation helpers
|
357
|
+
##########################################################################
|
358
|
+
|
359
|
+
def check_unknown_start_tag(token)
|
360
|
+
# check for recognized tag name
|
361
|
+
name = (token[:name] || "").downcase
|
362
|
+
if !@@allowed_attribute_map.keys.include?(name)
|
363
|
+
yield({:type => "ParseError",
|
364
|
+
:data => "unknown-start-tag",
|
365
|
+
:datavars => {"tagName" => name}})
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
def check_start_tag_required_attributes(token)
|
370
|
+
# check for presence of required attributes
|
371
|
+
name = (token[:name] || "").downcase
|
372
|
+
if @@required_attribute_map.keys().include?(name)
|
373
|
+
attrs_present = (token[:data] || []).collect{|t| t[0]}
|
374
|
+
for attr_name in @@required_attribute_map[name]
|
375
|
+
if !attrs_present.include?(attr_name)
|
376
|
+
yield( {:type => "ParseError",
|
377
|
+
:data => "missing-required-attribute",
|
378
|
+
:datavars => {"tagName" => name,
|
379
|
+
"attributeName" => attr_name}})
|
380
|
+
end
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
def check_start_tag_unknown_attributes(token)
|
386
|
+
# check for recognized attribute names
|
387
|
+
name = token[:name].downcase
|
388
|
+
allowed_attributes = @@global_attributes | @@allowed_attribute_map.fetch(name, [])
|
389
|
+
for attr_name, attr_value in token.fetch(:data, [])
|
390
|
+
if !allowed_attributes.include?(attr_name.downcase())
|
391
|
+
yield( {:type => "ParseError",
|
392
|
+
:data => "unknown-attribute",
|
393
|
+
:datavars => {"tagName" => name,
|
394
|
+
"attributeName" => attr_name}})
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
##########################################################################
|
400
|
+
# Attribute validation helpers
|
401
|
+
##########################################################################
|
402
|
+
|
403
|
+
# def checkURI(token, tag_name, attr_name, attr_value)
|
404
|
+
# is_valid, error_code = rfc3987.is_valid_uri(attr_value)
|
405
|
+
# if not is_valid
|
406
|
+
# yield {:type => "ParseError",
|
407
|
+
# :data => error_code,
|
408
|
+
# :datavars => {"tagName" => tag_name,
|
409
|
+
# "attributeName" => attr_name}}
|
410
|
+
# yield {:type => "ParseError",
|
411
|
+
# :data => "invalid-attribute-value",
|
412
|
+
# :datavars => {"tagName" => tag_name,
|
413
|
+
# "attributeName" => attr_name}}
|
414
|
+
|
415
|
+
def check_iri(token, tag_name, attr_name, attr_value)
|
416
|
+
is_valid, error_code = is_valid_iri(attr_value)
|
417
|
+
if !is_valid
|
418
|
+
yield({:type => "ParseError",
|
419
|
+
:data => error_code,
|
420
|
+
:datavars => {"tagName" => tag_name,
|
421
|
+
"attributeName" => attr_name}})
|
422
|
+
yield({:type => "ParseError",
|
423
|
+
:data => "invalid-attribute-value",
|
424
|
+
:datavars => {"tagName" => tag_name,
|
425
|
+
"attributeName" => attr_name}})
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
def check_id(token, tag_name, attr_name, attr_value)
|
430
|
+
if !attr_value || attr_value.length == 0
|
431
|
+
yield({:type => "ParseError",
|
432
|
+
:data => "attribute-value-can-not-be-blank",
|
433
|
+
:datavars => {"tagName" => tag_name,
|
434
|
+
"attributeName" => attr_name}})
|
435
|
+
end
|
436
|
+
attr_value.each_byte do |b|
|
437
|
+
c = [b].pack('c*')
|
438
|
+
if HTML5::SPACE_CHARACTERS.include?(c)
|
439
|
+
yield( {:type => "ParseError",
|
440
|
+
:data => "space-in-id",
|
441
|
+
:datavars => {"tagName" => tag_name,
|
442
|
+
"attributeName" => attr_name}})
|
443
|
+
yield( {:type => "ParseError",
|
444
|
+
:data => "invalid-attribute-value",
|
445
|
+
:datavars => {"tagName" => tag_name,
|
446
|
+
"attributeName" => attr_name}})
|
447
|
+
break
|
448
|
+
end
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
def parse_token_list(value)
|
453
|
+
valueList = []
|
454
|
+
currentValue = ''
|
455
|
+
(value + ' ').each_byte do |b|
|
456
|
+
c = [b].pack('c*')
|
457
|
+
if HTML5::SPACE_CHARACTERS.include?(c)
|
458
|
+
if currentValue.length > 0
|
459
|
+
valueList << currentValue
|
460
|
+
currentValue = ''
|
461
|
+
end
|
462
|
+
else
|
463
|
+
currentValue += c
|
464
|
+
end
|
465
|
+
end
|
466
|
+
if currentValue.length > 0
|
467
|
+
valueList << currentValue
|
468
|
+
end
|
469
|
+
valueList
|
470
|
+
end
|
471
|
+
|
472
|
+
def check_token_list(tag_name, attr_name, attr_value)
|
473
|
+
# The "token" in the method name refers to tokens in an attribute value
|
474
|
+
# i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
|
475
|
+
# but the "token" parameter refers to the token generated from
|
476
|
+
# HTMLTokenizer. Sorry for the confusion.
|
477
|
+
value_list = parse_token_list(attr_value)
|
478
|
+
value_dict = {}
|
479
|
+
for current_value in value_list
|
480
|
+
if value_dict.has_key?(current_value)
|
481
|
+
yield({:type => "ParseError",
|
482
|
+
:data => "duplicate-value-in-token-list",
|
483
|
+
:datavars => {"tagName" => tag_name,
|
484
|
+
"attributeName" => attr_name,
|
485
|
+
"attributeValue" => current_value}})
|
486
|
+
break
|
487
|
+
end
|
488
|
+
value_dict[current_value] = 1
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
def check_enumerated_value(token, tag_name, attr_name, attr_value, enumerated_values)
|
493
|
+
if !attr_value || attr_value.length == 0
|
494
|
+
yield( {:type => "ParseError",
|
495
|
+
:data => "attribute-value-can-not-be-blank",
|
496
|
+
:datavars => {"tagName" => tag_name,
|
497
|
+
"attributeName" => attr_name}})
|
498
|
+
return
|
499
|
+
end
|
500
|
+
attr_value.downcase!
|
501
|
+
if !enumerated_values.include?(attr_value)
|
502
|
+
yield( {:type => "ParseError",
|
503
|
+
:data => "invalid-enumerated-value",
|
504
|
+
:datavars => {"tagName" => tag_name,
|
505
|
+
"attribute_name" => attr_name,
|
506
|
+
"enumeratedValues" => enumerated_values}})
|
507
|
+
yield( {:type => "ParseError",
|
508
|
+
:data => "invalid-attribute-value",
|
509
|
+
:datavars => {"tagName" => tag_name,
|
510
|
+
"attributeName" => attr_name}})
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
def check_boolean(token, tag_name, attr_name, attr_value)
|
515
|
+
enumerated_values = [attr_name, '']
|
516
|
+
if !enumerated_values.include?(attr_value)
|
517
|
+
yield( {:type => "ParseError",
|
518
|
+
:data => "invalid-boolean-value",
|
519
|
+
:datavars => {"tagName" => tag_name,
|
520
|
+
"attributeName" => attr_name,
|
521
|
+
"enumeratedValues" => enumerated_values}})
|
522
|
+
yield( {:type => "ParseError",
|
523
|
+
:data => "invalid-attribute-value",
|
524
|
+
:datavars => {"tagName" => tag_name,
|
525
|
+
"attributeName" => attr_name}})
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
def check_integer(token, tag_name, attr_name, attr_value)
|
530
|
+
sign = 1
|
531
|
+
number_string = ''
|
532
|
+
state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
|
533
|
+
error = {:type => "ParseError",
|
534
|
+
:data => "invalid-integer-value",
|
535
|
+
:datavars => {"tagName" => tag_name,
|
536
|
+
"attributeName" => attr_name,
|
537
|
+
"attributeValue" => attr_value}}
|
538
|
+
attr_value.scan(/./) do |c|
|
539
|
+
if state == 'begin'
|
540
|
+
if HTML5::SPACE_CHARACTERS.include?(c)
|
541
|
+
next
|
542
|
+
elsif c == '-'
|
543
|
+
sign = -1
|
544
|
+
state = 'initial-number'
|
545
|
+
elsif HTML5::DIGITS.include?(c)
|
546
|
+
number_string += c
|
547
|
+
state = 'in-number'
|
548
|
+
else
|
549
|
+
yield error
|
550
|
+
return
|
551
|
+
end
|
552
|
+
elsif state == 'initial-number'
|
553
|
+
if !HTML5::DIGITS.include?(c)
|
554
|
+
yield error
|
555
|
+
return
|
556
|
+
end
|
557
|
+
number_string += c
|
558
|
+
state = 'in-number'
|
559
|
+
elsif state == 'in-number'
|
560
|
+
if HTML5::DIGITS.include?(c)
|
561
|
+
number_string += c
|
562
|
+
else
|
563
|
+
state = 'trailing-junk'
|
564
|
+
end
|
565
|
+
elsif state == 'trailing-junk'
|
566
|
+
next
|
567
|
+
end
|
568
|
+
end
|
569
|
+
if number_string.length == 0
|
570
|
+
yield( {:type => "ParseError",
|
571
|
+
:data => "attribute-value-can-not-be-blank",
|
572
|
+
:datavars => {"tagName" => tag_name,
|
573
|
+
"attributeName" => attr_name}})
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
def check_floating_point_number(token, tag_name, attr_name, attr_value)
|
578
|
+
# XXX
|
579
|
+
end
|
580
|
+
|
581
|
+
def check_browsing_context(token, tag_name, attr_name, attr_value)
|
582
|
+
return if not attr_value
|
583
|
+
return if attr_value[0] != ?_
|
584
|
+
attr_value.downcase!
|
585
|
+
return if ['_self', '_parent', '_top', '_blank'].include?(attr_value)
|
586
|
+
yield({:type => "ParseError",
|
587
|
+
:data => "invalid-browsing-context",
|
588
|
+
:datavars => {"tagName" => tag_name,
|
589
|
+
"attributeName" => attr_name}})
|
590
|
+
end
|
591
|
+
|
592
|
+
def check_lang_code(token, tag_name, attr_name, attr_value)
|
593
|
+
return if !attr_value || attr_value == '' # blank is OK
|
594
|
+
if not is_valid_lang_code(attr_value)
|
595
|
+
yield( {:type => "ParseError",
|
596
|
+
:data => "invalid-lang-code",
|
597
|
+
:datavars => {"tagName" => tag_name,
|
598
|
+
"attributeName" => attr_name,
|
599
|
+
"attributeValue" => attr_value}})
|
600
|
+
end
|
601
|
+
end
|
602
|
+
|
603
|
+
def check_mime_type(token, tag_name, attr_name, attr_value)
|
604
|
+
# XXX needs tests
|
605
|
+
if not attr_value
|
606
|
+
yield( {:type => "ParseError",
|
607
|
+
:data => "attribute-value-can-not-be-blank",
|
608
|
+
:datavars => {"tagName" => tag_name,
|
609
|
+
"attributeName" => attr_name}})
|
610
|
+
end
|
611
|
+
if not is_valid_mime_type(attr_value)
|
612
|
+
yield( {:type => "ParseError",
|
613
|
+
:data => "invalid-mime-type",
|
614
|
+
:datavars => {"tagName" => tag_name,
|
615
|
+
"attributeName" => attr_name,
|
616
|
+
"attributeValue" => attr_value}})
|
617
|
+
end
|
618
|
+
end
|
619
|
+
|
620
|
+
def check_media_query(token, tag_name, attr_name, attr_value)
|
621
|
+
# XXX
|
622
|
+
end
|
623
|
+
|
624
|
+
def check_link_relation(token, tag_name, attr_name, attr_value)
|
625
|
+
check_token_list(tag_name, attr_name, attr_value) do |t|
|
626
|
+
yield t
|
627
|
+
end
|
628
|
+
value_list = parse_token_list(attr_value)
|
629
|
+
allowed_values = tag_name == 'link' ? @@link_rel_values : @@a_rel_values
|
630
|
+
for current_value in value_list
|
631
|
+
if !allowed_values.include?(current_value)
|
632
|
+
yield({:type => "ParseError",
|
633
|
+
:data => "invalid-rel",
|
634
|
+
:datavars => {"tagName" => tag_name,
|
635
|
+
"attributeName" => attr_name}})
|
636
|
+
end
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
def check_date_time(token, tag_name, attr_name, attr_value)
|
641
|
+
# XXX
|
642
|
+
state = 'begin' # ('begin', '...
|
643
|
+
# for c in attr_value
|
644
|
+
# if state == 'begin' =>
|
645
|
+
# if SPACE_CHARACTERS.include?(c)
|
646
|
+
# continue
|
647
|
+
# elsif digits.include?(c)
|
648
|
+
# state = ...
|
649
|
+
end
|
650
|
+
|
651
|
+
##########################################################################
|
652
|
+
# Attribute validation
|
653
|
+
##########################################################################
|
654
|
+
|
655
|
+
def check_attribute_values(token)
|
656
|
+
tag_name = token.fetch(:name, "")
|
657
|
+
for attr_name, attr_value in token.fetch(:data, [])
|
658
|
+
attr_name = attr_name.downcase
|
659
|
+
method = "validate_attribute_value_#{tag_name.to_s.underscore}_#{attr_name.to_s.underscore}"
|
660
|
+
if respond_to?(method)
|
661
|
+
send(method, token, tag_name, attr_name, attr_value) do |t|
|
662
|
+
yield t
|
663
|
+
end
|
664
|
+
else
|
665
|
+
method = "validate_attribute_value_#{attr_name.to_s.underscore}"
|
666
|
+
if respond_to?(method)
|
667
|
+
send(method, token, tag_name, attr_name, attr_value) do |t|
|
668
|
+
yield t
|
669
|
+
end
|
670
|
+
end
|
671
|
+
end
|
672
|
+
end
|
673
|
+
end
|
674
|
+
|
675
|
+
def validate_attribute_value_class(token, tag_name, attr_name, attr_value)
|
676
|
+
check_token_list(tag_name, attr_name, attr_value) do |t|
|
677
|
+
yield t
|
678
|
+
yield( {:type => "ParseError",
|
679
|
+
:data => "invalid-attribute-value",
|
680
|
+
:datavars => {"tagName" => tag_name,
|
681
|
+
"attributeName" => attr_name}})
|
682
|
+
end
|
683
|
+
end
|
684
|
+
|
685
|
+
def validate_attribute_value_contenteditable(token, tag_name, attr_name, attr_value)
|
686
|
+
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false', '']) do |t|
|
687
|
+
yield t
|
688
|
+
end
|
689
|
+
end
|
690
|
+
|
691
|
+
def validate_attribute_value_dir(token, tag_name, attr_name, attr_value)
|
692
|
+
check_enumerated_value(token, tag_name, attr_name, attr_value, ['ltr', 'rtl']) do |t|
|
693
|
+
yield t
|
694
|
+
end
|
695
|
+
end
|
696
|
+
|
697
|
+
def validate_attribute_value_draggable(token, tag_name, attr_name, attr_value)
|
698
|
+
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false']) do |t|
|
699
|
+
yield t
|
700
|
+
end
|
701
|
+
end
|
702
|
+
|
703
|
+
alias validate_attribute_value_irrelevant check_boolean
|
704
|
+
alias validate_attribute_value_lang check_lang_code
|
705
|
+
|
706
|
+
def validate_attribute_value_contextmenu(token, tag_name, attr_name, attr_value)
|
707
|
+
check_id(token, tag_name, attr_name, attr_value) do |t|
|
708
|
+
yield t
|
709
|
+
end
|
710
|
+
@things_that_point_to_an_id << token
|
711
|
+
end
|
712
|
+
|
713
|
+
def validate_attribute_value_id(token, tag_name, attr_name, attr_value)
|
714
|
+
# This method has side effects. It adds 'token' to the list of
|
715
|
+
# things that define an ID (@things_that_define_an_id) so that we can
|
716
|
+
# later check 1) whether an ID is duplicated, and 2) whether all the
|
717
|
+
# things that point to something else by ID (like <label for> or
|
718
|
+
# <span contextmenu>) point to an ID that actually exists somewhere.
|
719
|
+
check_id(token, tag_name, attr_name, attr_value) do |t|
|
720
|
+
yield t
|
721
|
+
end
|
722
|
+
return if not attr_value
|
723
|
+
if @ids_we_have_known_and_loved.include?(attr_value)
|
724
|
+
yield( {:type => "ParseError",
|
725
|
+
:data => "duplicate-id",
|
726
|
+
:datavars => {"tagName" => tag_name}})
|
727
|
+
end
|
728
|
+
@ids_we_have_known_and_loved << attr_value
|
729
|
+
@things_that_define_an_id << token
|
730
|
+
end
|
731
|
+
|
732
|
+
alias validate_attribute_value_tabindex check_integer
|
733
|
+
|
734
|
+
def validate_attribute_value_ref(token, tag_name, attr_name, attr_value)
|
735
|
+
# XXX
|
736
|
+
end
|
737
|
+
|
738
|
+
def validate_attribute_value_template(token, tag_name, attr_name, attr_value)
|
739
|
+
# XXX
|
740
|
+
end
|
741
|
+
|
742
|
+
def validate_attribute_value_html_xmlns(token, tag_name, attr_name, attr_value)
|
743
|
+
if attr_value != "http://www.w3.org/1999/xhtml"
|
744
|
+
yield( {:type => "ParseError",
|
745
|
+
:data => "invalid-root-namespace",
|
746
|
+
:datavars => {"tagName" => tag_name,
|
747
|
+
"attributeName" => attr_name}})
|
748
|
+
end
|
749
|
+
end
|
750
|
+
|
751
|
+
alias validate_attribute_value_base_href check_iri
|
752
|
+
alias validate_attribute_value_base_target check_browsing_context
|
753
|
+
alias validate_attribute_value_link_href check_iri
|
754
|
+
alias validate_attribute_value_link_rel check_link_relation
|
755
|
+
alias validate_attribute_value_link_media check_media_query
|
756
|
+
alias validate_attribute_value_link_hreflang check_lang_code
|
757
|
+
alias validate_attribute_value_link_type check_mime_type
|
758
|
+
# XXX <meta> attributes
|
759
|
+
alias validate_attribute_value_style_media check_media_query
|
760
|
+
alias validate_attribute_value_style_type check_mime_type
|
761
|
+
alias validate_attribute_value_style_scoped check_boolean
|
762
|
+
alias validate_attribute_value_blockquote_cite check_iri
|
763
|
+
alias validate_attribute_value_ol_start check_integer
|
764
|
+
alias validate_attribute_value_li_value check_integer
|
765
|
+
# XXX need tests from here on
|
766
|
+
alias validate_attribute_value_a_href check_iri
|
767
|
+
alias validate_attribute_value_a_target check_browsing_context
|
768
|
+
|
769
|
+
def validate_attribute_value_a_ping(token, tag_name, attr_name, attr_value)
|
770
|
+
value_list = parse_token_list(attr_value)
|
771
|
+
for current_value in value_list
|
772
|
+
checkIRI(token, tag_name, attr_name, attr_value) do |t|
|
773
|
+
yield t
|
774
|
+
end
|
775
|
+
end
|
776
|
+
end
|
777
|
+
|
778
|
+
alias validate_attribute_value_a_rel check_link_relation
|
779
|
+
alias validate_attribute_value_a_media check_media_query
|
780
|
+
alias validate_attribute_value_a_hreflang check_lang_code
|
781
|
+
alias validate_attribute_value_a_type check_mime_type
|
782
|
+
alias validate_attribute_value_q_cite check_iri
|
783
|
+
alias validate_attribute_value_time_datetime check_date_time
|
784
|
+
alias validate_attribute_value_meter_value check_floating_point_number
|
785
|
+
alias validate_attribute_value_meter_min check_floating_point_number
|
786
|
+
alias validate_attribute_value_meter_low check_floating_point_number
|
787
|
+
alias validate_attribute_value_meter_high check_floating_point_number
|
788
|
+
alias validate_attribute_value_meter_max check_floating_point_number
|
789
|
+
alias validate_attribute_value_meter_optimum check_floating_point_number
|
790
|
+
alias validate_attribute_value_progress_value check_floating_point_number
|
791
|
+
alias validate_attribute_value_progress_max check_floating_point_number
|
792
|
+
alias validate_attribute_value_ins_cite check_iri
|
793
|
+
alias validate_attribute_value_ins_datetime check_date_time
|
794
|
+
alias validate_attribute_value_del_cite check_iri
|
795
|
+
alias validate_attribute_value_del_datetime check_date_time
|
796
|
+
|
797
|
+
##########################################################################
|
798
|
+
# Whole document validation (IDs, etc.)
|
799
|
+
##########################################################################
|
800
|
+
|
801
|
+
def eof
|
802
|
+
for token in @things_that_point_to_an_id
|
803
|
+
tag_name = token.fetch(:name, "").downcase
|
804
|
+
attrs_dict = token[:data] # by now html5parser has "normalized" the attrs list into a dict.
|
805
|
+
# hooray for obscure side effects!
|
806
|
+
attr_value = attrs_dict.fetch("contextmenu", "")
|
807
|
+
if attr_value and (!@ids_we_have_known_and_loved.include?(attr_value))
|
808
|
+
yield( {:type => "ParseError",
|
809
|
+
:data => "id-does-not-exist",
|
810
|
+
:datavars => {"tagName" => tag_name,
|
811
|
+
"attributeName" => "contextmenu",
|
812
|
+
"attributeValue" => attr_value}})
|
813
|
+
else
|
814
|
+
for ref_token in @things_that_define_an_id
|
815
|
+
id = ref_token.fetch(:data, {}).fetch("id", "")
|
816
|
+
if not id
|
817
|
+
continue
|
818
|
+
end
|
819
|
+
if id == attr_value
|
820
|
+
if ref_token.fetch(:name, "").downcase != "men"
|
821
|
+
yield( {:type => "ParseError",
|
822
|
+
:data => "contextmenu-must-point-to-menu"})
|
823
|
+
end
|
824
|
+
break
|
825
|
+
end
|
826
|
+
end
|
827
|
+
end
|
828
|
+
end
|
829
|
+
end
|
830
|
+
end
|