html5 0.1.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -2
- data/Manifest.txt +61 -2
- data/README +41 -5
- data/Rakefile.rb +22 -6
- data/{parse.rb → bin/html5} +11 -11
- data/lib/core_ext/string.rb +17 -0
- data/lib/html5/constants.rb +228 -0
- data/lib/html5/filters/iso639codes.rb +752 -0
- data/lib/html5/filters/rfc2046.rb +30 -0
- data/lib/html5/filters/rfc3987.rb +89 -0
- data/lib/html5/filters/validator.rb +830 -0
- data/lib/html5/html5parser.rb +25 -25
- data/lib/html5/html5parser/after_body_phase.rb +3 -3
- data/lib/html5/html5parser/after_frameset_phase.rb +3 -4
- data/lib/html5/html5parser/after_head_phase.rb +6 -6
- data/lib/html5/html5parser/before_head_phase.rb +1 -1
- data/lib/html5/html5parser/in_body_phase.rb +54 -48
- data/lib/html5/html5parser/in_caption_phase.rb +7 -6
- data/lib/html5/html5parser/in_cell_phase.rb +3 -3
- data/lib/html5/html5parser/in_column_group_phase.rb +1 -1
- data/lib/html5/html5parser/in_frameset_phase.rb +5 -5
- data/lib/html5/html5parser/in_head_phase.rb +10 -10
- data/lib/html5/html5parser/in_row_phase.rb +4 -2
- data/lib/html5/html5parser/in_select_phase.rb +7 -6
- data/lib/html5/html5parser/in_table_body_phase.rb +8 -5
- data/lib/html5/html5parser/in_table_phase.rb +12 -7
- data/lib/html5/html5parser/initial_phase.rb +5 -6
- data/lib/html5/html5parser/phase.rb +5 -9
- data/lib/html5/html5parser/root_element_phase.rb +1 -2
- data/lib/html5/html5parser/trailing_end_phase.rb +3 -3
- data/lib/html5/inputstream.rb +25 -31
- data/lib/html5/liberalxmlparser.rb +2 -2
- data/lib/html5/sanitizer.rb +6 -6
- data/lib/html5/serializer/htmlserializer.rb +2 -3
- data/lib/html5/sniffer.rb +45 -0
- data/lib/html5/tokenizer.rb +57 -59
- data/lib/html5/treebuilders/rexml.rb +7 -6
- data/lib/html5/treebuilders/simpletree.rb +1 -1
- data/lib/html5/treewalkers/base.rb +8 -0
- data/lib/html5/version.rb +3 -0
- data/testdata/encoding/chardet/test_big5.txt +51 -0
- data/testdata/encoding/test-yahoo-jp.dat +10 -0
- data/testdata/encoding/tests1.dat +394 -0
- data/testdata/encoding/tests2.dat +81 -0
- data/testdata/sanitizer/tests1.dat +416 -0
- data/testdata/serializer/core.test +104 -0
- data/testdata/serializer/injectmeta.test +65 -0
- data/testdata/serializer/optionaltags.test +900 -0
- data/testdata/serializer/options.test +60 -0
- data/testdata/serializer/whitespace.test +51 -0
- data/testdata/sites/google-results.htm +1 -0
- data/testdata/sites/python-ref-import.htm +1 -0
- data/testdata/sites/web-apps-old.htm +1 -0
- data/testdata/sites/web-apps.htm +34275 -0
- data/testdata/sniffer/htmlOrFeed.json +43 -0
- data/testdata/tokenizer/contentModelFlags.test +48 -0
- data/testdata/tokenizer/entities.test +2339 -0
- data/testdata/tokenizer/escapeFlag.test +21 -0
- data/testdata/tokenizer/test1.test +172 -0
- data/testdata/tokenizer/test2.test +129 -0
- data/testdata/tokenizer/test3.test +367 -0
- data/testdata/tokenizer/test4.test +198 -0
- data/testdata/tree-construction/tests1.dat +1950 -0
- data/testdata/tree-construction/tests2.dat +773 -0
- data/testdata/tree-construction/tests3.dat +270 -0
- data/testdata/tree-construction/tests4.dat +60 -0
- data/testdata/tree-construction/tests5.dat +175 -0
- data/testdata/tree-construction/tests6.dat +196 -0
- data/testdata/validator/attributes.test +1035 -0
- data/testdata/validator/base-href-attribute.test +787 -0
- data/testdata/validator/base-target-attribute.test +35 -0
- data/testdata/validator/blockquote-cite-attribute.test +7 -0
- data/testdata/validator/classattribute.test +152 -0
- data/testdata/validator/contenteditableattribute.test +59 -0
- data/testdata/validator/contextmenuattribute.test +115 -0
- data/testdata/validator/dirattribute.test +59 -0
- data/testdata/validator/draggableattribute.test +63 -0
- data/testdata/validator/html-xmlns-attribute.test +23 -0
- data/testdata/validator/idattribute.test +115 -0
- data/testdata/validator/inputattributes.test +2795 -0
- data/testdata/validator/irrelevantattribute.test +63 -0
- data/testdata/validator/langattribute.test +5579 -0
- data/testdata/validator/li-value-attribute.test +7 -0
- data/testdata/validator/link-href-attribute.test +7 -0
- data/testdata/validator/link-hreflang-attribute.test +7 -0
- data/testdata/validator/link-rel-attribute.test +271 -0
- data/testdata/validator/ol-start-attribute.test +7 -0
- data/testdata/validator/starttags.test +375 -0
- data/testdata/validator/style-scoped-attribute.test +7 -0
- data/testdata/validator/tabindexattribute.test +79 -0
- data/tests/preamble.rb +7 -17
- data/tests/test_encoding.rb +1 -1
- data/tests/test_lxp.rb +16 -0
- data/tests/test_parser.rb +2 -2
- data/tests/test_sniffer.rb +27 -0
- data/tests/test_treewalkers.rb +41 -22
- data/tests/test_validator.rb +31 -0
- metadata +65 -6
|
@@ -33,10 +33,9 @@ module HTML5
|
|
|
33
33
|
|
|
34
34
|
def insert_html_element
|
|
35
35
|
element = @tree.createElement('html', {})
|
|
36
|
-
@tree.open_elements
|
|
36
|
+
@tree.open_elements << element
|
|
37
37
|
@tree.document.appendChild(element)
|
|
38
38
|
@parser.phase = @parser.phases[:beforeHead]
|
|
39
39
|
end
|
|
40
|
-
|
|
41
40
|
end
|
|
42
41
|
end
|
|
@@ -15,19 +15,19 @@ module HTML5
|
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def processCharacters(data)
|
|
18
|
-
parse_error(
|
|
18
|
+
parse_error("expected-eof-but-got-char")
|
|
19
19
|
@parser.phase = @parser.last_phase
|
|
20
20
|
@parser.phase.processCharacters(data)
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def processStartTag(name, attributes)
|
|
24
|
-
parse_error(
|
|
24
|
+
parse_error("expected-eof-but-got-start-tag", {"name" => name})
|
|
25
25
|
@parser.phase = @parser.last_phase
|
|
26
26
|
@parser.phase.processStartTag(name, attributes)
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
def processEndTag(name)
|
|
30
|
-
parse_error(
|
|
30
|
+
parse_error("expected-eof-but-got-end-tag", {"name" => name})
|
|
31
31
|
@parser.phase = @parser.last_phase
|
|
32
32
|
@parser.phase.processEndTag(name)
|
|
33
33
|
end
|
data/lib/html5/inputstream.rb
CHANGED
|
@@ -60,15 +60,11 @@ module HTML5
|
|
|
60
60
|
if @char_encoding == 'windows-1252'
|
|
61
61
|
@win1252 = true
|
|
62
62
|
elsif @char_encoding != 'utf-8'
|
|
63
|
+
require 'iconv'
|
|
63
64
|
begin
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
@buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
|
|
68
|
-
rescue
|
|
69
|
-
@win1252 = true
|
|
70
|
-
end
|
|
71
|
-
rescue LoadError
|
|
65
|
+
@buffer << @raw_stream.read unless @raw_stream.eof?
|
|
66
|
+
@buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
|
|
67
|
+
rescue
|
|
72
68
|
@win1252 = true
|
|
73
69
|
end
|
|
74
70
|
end
|
|
@@ -88,12 +84,11 @@ module HTML5
|
|
|
88
84
|
def open_stream(source)
|
|
89
85
|
# Already an IO like object
|
|
90
86
|
if source.respond_to?(:read)
|
|
91
|
-
|
|
87
|
+
source
|
|
92
88
|
else
|
|
93
89
|
# Treat source as a string and wrap in StringIO
|
|
94
|
-
|
|
90
|
+
StringIO.new(source)
|
|
95
91
|
end
|
|
96
|
-
return @stream
|
|
97
92
|
end
|
|
98
93
|
|
|
99
94
|
def detect_encoding
|
|
@@ -138,14 +133,12 @@ module HTML5
|
|
|
138
133
|
encoding = @DEFAULT_ENCODING
|
|
139
134
|
end
|
|
140
135
|
|
|
141
|
-
#Substitute for equivalent
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if encoding_sub.has_key?(encoding.downcase)
|
|
145
|
-
encoding = encoding_sub[encoding.downcase]
|
|
136
|
+
#Substitute for equivalent encoding
|
|
137
|
+
if 'iso-8859-1' == encoding.downcase
|
|
138
|
+
encoding = 'windows-1252'
|
|
146
139
|
end
|
|
147
140
|
|
|
148
|
-
|
|
141
|
+
encoding
|
|
149
142
|
end
|
|
150
143
|
|
|
151
144
|
# Attempts to detect at BOM at the start of the stream. If
|
|
@@ -153,9 +146,9 @@ module HTML5
|
|
|
153
146
|
# encoding otherwise return nil
|
|
154
147
|
def detect_bom
|
|
155
148
|
bom_dict = {
|
|
156
|
-
"\xef\xbb\xbf"
|
|
157
|
-
"\xff\xfe"
|
|
158
|
-
"\xfe\xff"
|
|
149
|
+
"\xef\xbb\xbf" => 'utf-8',
|
|
150
|
+
"\xff\xfe" => 'utf-16le',
|
|
151
|
+
"\xfe\xff" => 'utf-16be',
|
|
159
152
|
"\xff\xfe\x00\x00" => 'utf-32le',
|
|
160
153
|
"\x00\x00\xfe\xff" => 'utf-32be'
|
|
161
154
|
}
|
|
@@ -198,6 +191,7 @@ module HTML5
|
|
|
198
191
|
end
|
|
199
192
|
end
|
|
200
193
|
|
|
194
|
+
#TODO: huh?
|
|
201
195
|
require 'delegate'
|
|
202
196
|
@raw_stream = SimpleDelegator.new(@raw_stream)
|
|
203
197
|
|
|
@@ -250,7 +244,7 @@ module HTML5
|
|
|
250
244
|
col -= 1
|
|
251
245
|
end
|
|
252
246
|
end
|
|
253
|
-
return [line+1, col]
|
|
247
|
+
return [line + 1, col]
|
|
254
248
|
end
|
|
255
249
|
|
|
256
250
|
# Read one character from the stream or queue if available. Return
|
|
@@ -259,9 +253,9 @@ module HTML5
|
|
|
259
253
|
unless @queue.empty?
|
|
260
254
|
return @queue.shift
|
|
261
255
|
else
|
|
262
|
-
if @tell + 3 > @buffer.length
|
|
256
|
+
if @tell + 3 > @buffer.length && !@raw_stream.eof?
|
|
263
257
|
# read next block
|
|
264
|
-
@buffer = @buffer[@tell
|
|
258
|
+
@buffer = @buffer[@tell..-1] + @raw_stream.read(@NUM_BYTES_BUFFER)
|
|
265
259
|
@tell = 0
|
|
266
260
|
end
|
|
267
261
|
|
|
@@ -269,7 +263,7 @@ module HTML5
|
|
|
269
263
|
@tell += 1
|
|
270
264
|
|
|
271
265
|
case c
|
|
272
|
-
when 0x01
|
|
266
|
+
when 0x01..0x7F
|
|
273
267
|
if c == 0x0D
|
|
274
268
|
# normalize newlines
|
|
275
269
|
@tell += 1 if @buffer[@tell] == 0x0A
|
|
@@ -287,7 +281,7 @@ module HTML5
|
|
|
287
281
|
|
|
288
282
|
c.chr
|
|
289
283
|
|
|
290
|
-
when 0x80
|
|
284
|
+
when 0x80..0xBF
|
|
291
285
|
if !@win1252
|
|
292
286
|
[0xFFFD].pack('U') # invalid utf-8
|
|
293
287
|
elsif c <= 0x9f
|
|
@@ -296,10 +290,11 @@ module HTML5
|
|
|
296
290
|
"\xC2" + c.chr # convert to utf-8
|
|
297
291
|
end
|
|
298
292
|
|
|
299
|
-
when 0xC0
|
|
300
|
-
if
|
|
301
|
-
"\xC3" + (c-64).chr # convert to utf-8
|
|
302
|
-
|
|
293
|
+
when 0xC0..0xFF
|
|
294
|
+
if instance_variables.include?("@win1252") && @win1252
|
|
295
|
+
"\xC3" + (c - 64).chr # convert to utf-8
|
|
296
|
+
# from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
|
|
297
|
+
elsif @buffer[@tell - 1..@tell + 3] =~ /^
|
|
303
298
|
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
|
304
299
|
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
|
305
300
|
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
|
@@ -315,8 +310,7 @@ module HTML5
|
|
|
315
310
|
end
|
|
316
311
|
|
|
317
312
|
when 0x00
|
|
318
|
-
@errors.push(
|
|
319
|
-
'replaced with U+FFFD')
|
|
313
|
+
@errors.push("null-character")
|
|
320
314
|
[0xFFFD].pack('U') # null characters are invalid
|
|
321
315
|
|
|
322
316
|
else
|
|
@@ -50,7 +50,7 @@ module HTML5
|
|
|
50
50
|
|
|
51
51
|
when :EndTag
|
|
52
52
|
if token[:data]
|
|
53
|
-
parse_error(
|
|
53
|
+
parse_error("attributes-in-end-tag")
|
|
54
54
|
end
|
|
55
55
|
|
|
56
56
|
when :Comment
|
|
@@ -81,7 +81,7 @@ module HTML5
|
|
|
81
81
|
# open and close tags are emitted
|
|
82
82
|
if token[:type] == :EndTag
|
|
83
83
|
if VOID_ELEMENTS.include? token[:name]
|
|
84
|
-
if @tree.open_elements[-1].name != token["name"]
|
|
84
|
+
if @tree.open_elements[-1].name != token["name"]
|
|
85
85
|
token[:type] = :EmptyTag
|
|
86
86
|
token["data"] ||= {}
|
|
87
87
|
end
|
data/lib/html5/sanitizer.rb
CHANGED
|
@@ -110,13 +110,13 @@ module HTML5
|
|
|
110
110
|
def sanitize_token(token)
|
|
111
111
|
case token[:type]
|
|
112
112
|
when :StartTag, :EndTag, :EmptyTag
|
|
113
|
-
if ALLOWED_ELEMENTS.include?(token[:name])
|
|
113
|
+
if self.class.const_get("ALLOWED_ELEMENTS").include?(token[:name])
|
|
114
114
|
if token.has_key? :data
|
|
115
115
|
attrs = Hash[*token[:data].flatten]
|
|
116
|
-
attrs.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
|
|
116
|
+
attrs.delete_if { |attr,v| !self.class.const_get("ALLOWED_ATTRIBUTES").include?(attr) }
|
|
117
117
|
ATTR_VAL_IS_URI.each do |attr|
|
|
118
118
|
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
|
119
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
|
|
119
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !self.class.const_get("ALLOWED_PROTOCOLS").include?(val_unescaped.split(':')[0])
|
|
120
120
|
attrs.delete attr
|
|
121
121
|
end
|
|
122
122
|
end
|
|
@@ -160,14 +160,14 @@ module HTML5
|
|
|
160
160
|
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
|
|
161
161
|
next if val.empty?
|
|
162
162
|
prop.downcase!
|
|
163
|
-
if ALLOWED_CSS_PROPERTIES.include?(prop)
|
|
163
|
+
if self.class.const_get("ALLOWED_CSS_PROPERTIES").include?(prop)
|
|
164
164
|
clean << "#{prop}: #{val};"
|
|
165
165
|
elsif %w[background border margin padding].include?(prop.split('-')[0])
|
|
166
166
|
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
|
167
|
-
!ALLOWED_CSS_KEYWORDS.include?(keyword) and
|
|
167
|
+
!self.class.const_get("ALLOWED_CSS_KEYWORDS").include?(keyword) and
|
|
168
168
|
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
|
|
169
169
|
end
|
|
170
|
-
elsif ALLOWED_SVG_PROPERTIES.include?(prop)
|
|
170
|
+
elsif self.class.const_get("ALLOWED_SVG_PROPERTIES").include?(prop)
|
|
171
171
|
clean << "#{prop}: #{val};"
|
|
172
172
|
end
|
|
173
173
|
end
|
|
@@ -31,7 +31,7 @@ module HTML5
|
|
|
31
31
|
@inject_meta_charset = true
|
|
32
32
|
|
|
33
33
|
options.each do |name, value|
|
|
34
|
-
next unless
|
|
34
|
+
next unless instance_variables.include?("@#{name}")
|
|
35
35
|
@use_best_quote_char = false if name.to_s == 'quote_char'
|
|
36
36
|
instance_variable_set("@#{name}", value)
|
|
37
37
|
end
|
|
@@ -73,7 +73,7 @@ module HTML5
|
|
|
73
73
|
elsif [:Characters, :SpaceCharacters].include? type
|
|
74
74
|
if type == :SpaceCharacters or in_cdata
|
|
75
75
|
if in_cdata and token[:data].include?("</")
|
|
76
|
-
serialize_error(
|
|
76
|
+
serialize_error("Unexpected </ in CDATA")
|
|
77
77
|
end
|
|
78
78
|
result << token[:data]
|
|
79
79
|
else
|
|
@@ -171,7 +171,6 @@ module HTML5
|
|
|
171
171
|
end
|
|
172
172
|
end
|
|
173
173
|
|
|
174
|
-
def _(string); string; end
|
|
175
174
|
end
|
|
176
175
|
|
|
177
176
|
# Error in serialized tree
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
module HTML5
|
|
2
|
+
module Sniffer
|
|
3
|
+
# 4.7.4
|
|
4
|
+
def html_or_feed str
|
|
5
|
+
s = str[0, 512] # steps 1, 2
|
|
6
|
+
pos = 0
|
|
7
|
+
|
|
8
|
+
while pos < s.length
|
|
9
|
+
case s[pos]
|
|
10
|
+
when 0x09, 0x20, 0x0A, 0x0D # tab, space, LF, CR
|
|
11
|
+
pos += 1
|
|
12
|
+
when 0x3C # "<"
|
|
13
|
+
pos += 1
|
|
14
|
+
if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
|
|
15
|
+
pos += 3
|
|
16
|
+
until s[pos..pos+2] == "-->" or pos >= s.length
|
|
17
|
+
pos += 1
|
|
18
|
+
end
|
|
19
|
+
pos += 3
|
|
20
|
+
elsif s[pos] == 0x21 # "!"
|
|
21
|
+
pos += 1
|
|
22
|
+
until s[pos] == 0x3E or pos >= s.length # ">"
|
|
23
|
+
pos += 1
|
|
24
|
+
end
|
|
25
|
+
pos += 1
|
|
26
|
+
elsif s[pos] == 0x3F # "?"
|
|
27
|
+
until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
|
|
28
|
+
pos += 1
|
|
29
|
+
end
|
|
30
|
+
pos += 2
|
|
31
|
+
elsif s[pos..pos+2] == "rss" # [0x72, 0x73, 0x73]
|
|
32
|
+
return "application/rss+xml"
|
|
33
|
+
elsif s[pos..pos+3] == "feed" # [0x66, 0x65, 0x65, 0x64]
|
|
34
|
+
return "application/atom+xml"
|
|
35
|
+
elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
|
|
36
|
+
raise NotImplementedError
|
|
37
|
+
end
|
|
38
|
+
else
|
|
39
|
+
break
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
"text/html"
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
data/lib/html5/tokenizer.rb
CHANGED
|
@@ -69,7 +69,7 @@ module HTML5
|
|
|
69
69
|
if @current_token[:type] == :StartTag and data == ">"
|
|
70
70
|
@current_token[:type] = :EmptyTag
|
|
71
71
|
else
|
|
72
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
72
|
+
@token_queue << {:type => :ParseError, :data => "incorrectly-placed-solidus"}
|
|
73
73
|
end
|
|
74
74
|
|
|
75
75
|
# The character we just consumed need to be put back on the stack so it
|
|
@@ -107,12 +107,12 @@ module HTML5
|
|
|
107
107
|
charAsInt = char_stack.join('').to_i(radix)
|
|
108
108
|
|
|
109
109
|
if charAsInt == 13
|
|
110
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
110
|
+
@token_queue << {:type => :ParseError, :data => "incorrect-cr-newline-entity"}
|
|
111
111
|
charAsInt = 10
|
|
112
112
|
elsif (128..159).include? charAsInt
|
|
113
113
|
# If the integer is between 127 and 160 (so 128 and bigger and 159
|
|
114
114
|
# and smaller) we need to do the "windows trick".
|
|
115
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
115
|
+
@token_queue << {:type => :ParseError, :data => "illegal-windows-1252-entity"}
|
|
116
116
|
|
|
117
117
|
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
|
|
118
118
|
end
|
|
@@ -121,13 +121,13 @@ module HTML5
|
|
|
121
121
|
char = [charAsInt].pack('U')
|
|
122
122
|
else
|
|
123
123
|
char = [0xFFFD].pack('U')
|
|
124
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
124
|
+
@token_queue << {:type => :ParseError, :data => "cant-convert-numeric-entity", :datavars => {"charAsInt" => charAsInt}}
|
|
125
125
|
end
|
|
126
126
|
|
|
127
127
|
# Discard the ; if present. Otherwise, put it back on the queue and
|
|
128
128
|
# invoke parse_error on parser.
|
|
129
129
|
if c != ";"
|
|
130
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
130
|
+
@token_queue << {:type => :ParseError, :data => "numeric-entity-without-semicolon"}
|
|
131
131
|
@stream.unget(c)
|
|
132
132
|
end
|
|
133
133
|
|
|
@@ -147,7 +147,7 @@ module HTML5
|
|
|
147
147
|
# back in the queue
|
|
148
148
|
char_stack = char_stack[0...char_stack.index(:EOF)]
|
|
149
149
|
@stream.unget(char_stack)
|
|
150
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
150
|
+
@token_queue << {:type => :ParseError, :data => "expected-numeric-entity-but-got-eof"}
|
|
151
151
|
else
|
|
152
152
|
if char_stack[1].downcase == "x" and HEX_DIGITS.include? char_stack[2]
|
|
153
153
|
# Hexadecimal entity detected.
|
|
@@ -160,7 +160,7 @@ module HTML5
|
|
|
160
160
|
else
|
|
161
161
|
# No number entity detected.
|
|
162
162
|
@stream.unget(char_stack)
|
|
163
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
163
|
+
@token_queue << {:type => :ParseError, :data => "expected-numeric-entity"}
|
|
164
164
|
end
|
|
165
165
|
end
|
|
166
166
|
else
|
|
@@ -196,10 +196,10 @@ module HTML5
|
|
|
196
196
|
# Check whether or not the last character returned can be
|
|
197
197
|
# discarded or needs to be put back.
|
|
198
198
|
if entityName[-1] != ?;
|
|
199
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
199
|
+
@token_queue << {:type => :ParseError, :data => "named-entity-without-semicolon"}
|
|
200
200
|
end
|
|
201
201
|
|
|
202
|
-
if
|
|
202
|
+
if entityName[-1] != ";" and from_attribute and
|
|
203
203
|
(ASCII_LETTERS.include?(char_stack[entityName.length]) or
|
|
204
204
|
DIGITS.include?(char_stack[entityName.length]))
|
|
205
205
|
@stream.unget(char_stack)
|
|
@@ -208,7 +208,7 @@ module HTML5
|
|
|
208
208
|
@stream.unget(char_stack[entityName.length..-1])
|
|
209
209
|
end
|
|
210
210
|
else
|
|
211
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
211
|
+
@token_queue << {:type => :ParseError, :data => "expected-named-entity"}
|
|
212
212
|
@stream.unget(char_stack)
|
|
213
213
|
end
|
|
214
214
|
end
|
|
@@ -217,7 +217,7 @@ module HTML5
|
|
|
217
217
|
|
|
218
218
|
# This method replaces the need for "entityInAttributeValueState".
|
|
219
219
|
def process_entity_in_attribute
|
|
220
|
-
entity = consume_entity(
|
|
220
|
+
entity = consume_entity()
|
|
221
221
|
if entity
|
|
222
222
|
@current_token[:data][-1][1] += entity
|
|
223
223
|
else
|
|
@@ -309,19 +309,18 @@ module HTML5
|
|
|
309
309
|
elsif data == ">"
|
|
310
310
|
# XXX In theory it could be something besides a tag name. But
|
|
311
311
|
# do we really care?
|
|
312
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
312
|
+
@token_queue << {:type => :ParseError, :data => "expected-tag-name-but-got-right-bracket"}
|
|
313
313
|
@token_queue << {:type => :Characters, :data => "<>"}
|
|
314
314
|
@state = :data_state
|
|
315
315
|
elsif data == "?"
|
|
316
316
|
# XXX In theory it could be something besides a tag name. But
|
|
317
317
|
# do we really care?
|
|
318
|
-
@token_queue.push({:type => :ParseError, :data =>
|
|
319
|
-
"support processing instructions).")})
|
|
318
|
+
@token_queue.push({:type => :ParseError, :data => "expected-tag-name-but-got-question-mark"})
|
|
320
319
|
@stream.unget(data)
|
|
321
320
|
@state = :bogus_comment_state
|
|
322
321
|
else
|
|
323
322
|
# XXX
|
|
324
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
323
|
+
@token_queue << {:type => :ParseError, :data => "expected-tag-name"}
|
|
325
324
|
@token_queue << {:type => :Characters, :data => "<"}
|
|
326
325
|
@stream.unget(data)
|
|
327
326
|
@state = :data_state
|
|
@@ -382,18 +381,18 @@ module HTML5
|
|
|
382
381
|
|
|
383
382
|
data = @stream.char
|
|
384
383
|
if data == :EOF
|
|
385
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
384
|
+
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-eof"}
|
|
386
385
|
@token_queue << {:type => :Characters, :data => "</"}
|
|
387
386
|
@state = :data_state
|
|
388
387
|
elsif ASCII_LETTERS.include? data
|
|
389
388
|
@current_token = {:type => :EndTag, :name => data, :data => []}
|
|
390
389
|
@state = :tag_name_state
|
|
391
390
|
elsif data == ">"
|
|
392
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
391
|
+
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-right-bracket"}
|
|
393
392
|
@state = :data_state
|
|
394
393
|
else
|
|
395
394
|
# XXX data can be _'_...
|
|
396
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
395
|
+
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-char", :datavars => {:data => data}}
|
|
397
396
|
@stream.unget(data)
|
|
398
397
|
@state = :bogus_comment_state
|
|
399
398
|
end
|
|
@@ -406,7 +405,7 @@ module HTML5
|
|
|
406
405
|
if SPACE_CHARACTERS.include? data
|
|
407
406
|
@state = :before_attribute_name_state
|
|
408
407
|
elsif data == :EOF
|
|
409
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
408
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-tag-name"}
|
|
410
409
|
emit_current_token
|
|
411
410
|
elsif ASCII_LETTERS.include? data
|
|
412
411
|
@current_token[:name] += data + @stream.chars_until(ASCII_LETTERS, true)
|
|
@@ -426,7 +425,7 @@ module HTML5
|
|
|
426
425
|
if SPACE_CHARACTERS.include? data
|
|
427
426
|
@stream.chars_until(SPACE_CHARACTERS, true)
|
|
428
427
|
elsif data == :EOF
|
|
429
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
428
|
+
@token_queue << {:type => :ParseError, :data => "expected-attribute-name-but-got-eof"}
|
|
430
429
|
emit_current_token
|
|
431
430
|
elsif ASCII_LETTERS.include? data
|
|
432
431
|
@current_token[:data].push([data, ""])
|
|
@@ -449,7 +448,7 @@ module HTML5
|
|
|
449
448
|
if data == "="
|
|
450
449
|
@state = :before_attribute_value_state
|
|
451
450
|
elsif data == :EOF
|
|
452
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
451
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-name"}
|
|
453
452
|
@state = :data_state
|
|
454
453
|
emitToken = true
|
|
455
454
|
elsif ASCII_LETTERS.include? data
|
|
@@ -479,7 +478,7 @@ module HTML5
|
|
|
479
478
|
end
|
|
480
479
|
@current_token[:data][0...-1].each {|name,value|
|
|
481
480
|
if @current_token[:data].last.first == name
|
|
482
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
481
|
+
@token_queue << {:type => :ParseError, :data => "duplicate-attribute"}
|
|
483
482
|
break # don't report an error more than once
|
|
484
483
|
end
|
|
485
484
|
}
|
|
@@ -498,7 +497,7 @@ module HTML5
|
|
|
498
497
|
elsif data == ">"
|
|
499
498
|
emit_current_token
|
|
500
499
|
elsif data == :EOF
|
|
501
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
500
|
+
@token_queue << {:type => :ParseError, :data => "expected-end-of-tag-but-got-eof"}
|
|
502
501
|
emit_current_token
|
|
503
502
|
elsif ASCII_LETTERS.include? data
|
|
504
503
|
@current_token[:data].push([data, ""])
|
|
@@ -527,7 +526,7 @@ module HTML5
|
|
|
527
526
|
elsif data == ">"
|
|
528
527
|
emit_current_token
|
|
529
528
|
elsif data == :EOF
|
|
530
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
529
|
+
@token_queue << {:type => :ParseError, :data => "expected-attribute-value-but-got-eof"}
|
|
531
530
|
emit_current_token
|
|
532
531
|
else
|
|
533
532
|
@current_token[:data][-1][1] += data
|
|
@@ -543,7 +542,7 @@ module HTML5
|
|
|
543
542
|
elsif data == "&"
|
|
544
543
|
process_entity_in_attribute
|
|
545
544
|
elsif data == :EOF
|
|
546
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
545
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-double-quote"}
|
|
547
546
|
emit_current_token
|
|
548
547
|
else
|
|
549
548
|
@current_token[:data][-1][1] += data + @stream.chars_until(["\"", "&"])
|
|
@@ -558,7 +557,7 @@ module HTML5
|
|
|
558
557
|
elsif data == "&"
|
|
559
558
|
process_entity_in_attribute
|
|
560
559
|
elsif data == :EOF
|
|
561
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
560
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-single-quote"}
|
|
562
561
|
emit_current_token
|
|
563
562
|
else
|
|
564
563
|
@current_token[:data][-1][1] += data +\
|
|
@@ -576,7 +575,7 @@ module HTML5
|
|
|
576
575
|
elsif data == ">"
|
|
577
576
|
emit_current_token
|
|
578
577
|
elsif data == :EOF
|
|
579
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
578
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-no-quotes"}
|
|
580
579
|
emit_current_token
|
|
581
580
|
else
|
|
582
581
|
@current_token[:data][-1][1] += data + @stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
|
|
@@ -609,7 +608,7 @@ module HTML5
|
|
|
609
608
|
@current_token = {:type => :Doctype, :name => "", :publicId => nil, :systemId => nil, :correct => true}
|
|
610
609
|
@state = :doctype_state
|
|
611
610
|
else
|
|
612
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
611
|
+
@token_queue << {:type => :ParseError, :data => "expected-dashes-or-doctype"}
|
|
613
612
|
@stream.unget(char_stack)
|
|
614
613
|
@state = :bogus_comment_state
|
|
615
614
|
end
|
|
@@ -622,11 +621,11 @@ module HTML5
|
|
|
622
621
|
if data == "-"
|
|
623
622
|
@state = :comment_start_dash_state
|
|
624
623
|
elsif data == ">"
|
|
625
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
624
|
+
@token_queue << {:type => :ParseError, :data => "incorrect-comment"}
|
|
626
625
|
@token_queue << @current_token
|
|
627
626
|
@state = :data_state
|
|
628
627
|
elsif data == :EOF
|
|
629
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
628
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
|
|
630
629
|
@token_queue << @current_token
|
|
631
630
|
@state = :data_state
|
|
632
631
|
else
|
|
@@ -641,11 +640,11 @@ module HTML5
|
|
|
641
640
|
if data == "-"
|
|
642
641
|
@state = :comment_end_state
|
|
643
642
|
elsif data == ">"
|
|
644
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
643
|
+
@token_queue << {:type => :ParseError, :data => "incorrect-comment"}
|
|
645
644
|
@token_queue << @current_token
|
|
646
645
|
@state = :data_state
|
|
647
646
|
elsif data == :EOF
|
|
648
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
647
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
|
|
649
648
|
@token_queue << @current_token
|
|
650
649
|
@state = :data_state
|
|
651
650
|
else
|
|
@@ -660,7 +659,7 @@ module HTML5
|
|
|
660
659
|
if data == "-"
|
|
661
660
|
@state = :comment_end_dash_state
|
|
662
661
|
elsif data == :EOF
|
|
663
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
662
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
|
|
664
663
|
@token_queue << @current_token
|
|
665
664
|
@state = :data_state
|
|
666
665
|
else
|
|
@@ -674,7 +673,7 @@ module HTML5
|
|
|
674
673
|
if data == "-"
|
|
675
674
|
@state = :comment_end_state
|
|
676
675
|
elsif data == :EOF
|
|
677
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
676
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-comment-end-dash"}
|
|
678
677
|
@token_queue << @current_token
|
|
679
678
|
@state = :data_state
|
|
680
679
|
else
|
|
@@ -694,15 +693,15 @@ module HTML5
|
|
|
694
693
|
@token_queue << @current_token
|
|
695
694
|
@state = :data_state
|
|
696
695
|
elsif data == "-"
|
|
697
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
696
|
+
@token_queue << {:type => :ParseError, :data => "unexpected-dash-after-double-dash-in-comment"}
|
|
698
697
|
@current_token[:data] += data
|
|
699
698
|
elsif data == :EOF
|
|
700
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
699
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-comment-double-dash"}
|
|
701
700
|
@token_queue << @current_token
|
|
702
701
|
@state = :data_state
|
|
703
702
|
else
|
|
704
703
|
# XXX
|
|
705
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
704
|
+
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-comment"}
|
|
706
705
|
@current_token[:data] += "--" + data
|
|
707
706
|
@state = :comment_state
|
|
708
707
|
end
|
|
@@ -714,7 +713,7 @@ module HTML5
|
|
|
714
713
|
if SPACE_CHARACTERS.include? data
|
|
715
714
|
@state = :before_doctype_name_state
|
|
716
715
|
else
|
|
717
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
716
|
+
@token_queue << {:type => :ParseError, :data => "need-space-after-doctype"}
|
|
718
717
|
@stream.unget(data)
|
|
719
718
|
@state = :before_doctype_name_state
|
|
720
719
|
end
|
|
@@ -725,12 +724,12 @@ module HTML5
|
|
|
725
724
|
data = @stream.char
|
|
726
725
|
if SPACE_CHARACTERS.include? data
|
|
727
726
|
elsif data == ">"
|
|
728
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
727
|
+
@token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-right-bracket"}
|
|
729
728
|
@current_token[:correct] = false
|
|
730
729
|
@token_queue << @current_token
|
|
731
730
|
@state = :data_state
|
|
732
731
|
elsif data == :EOF
|
|
733
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
732
|
+
@token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-eof"}
|
|
734
733
|
@current_token[:correct] = false
|
|
735
734
|
@token_queue << @current_token
|
|
736
735
|
@state = :data_state
|
|
@@ -749,7 +748,7 @@ module HTML5
|
|
|
749
748
|
@token_queue << @current_token
|
|
750
749
|
@state = :data_state
|
|
751
750
|
elsif data == :EOF
|
|
752
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
751
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype-name"}
|
|
753
752
|
@current_token[:correct] = false
|
|
754
753
|
@token_queue << @current_token
|
|
755
754
|
@state = :data_state
|
|
@@ -769,7 +768,7 @@ module HTML5
|
|
|
769
768
|
elsif data == :EOF
|
|
770
769
|
@current_token[:correct] = false
|
|
771
770
|
@stream.unget(data)
|
|
772
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
771
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
773
772
|
@token_queue << @current_token
|
|
774
773
|
@state = :data_state
|
|
775
774
|
else
|
|
@@ -782,7 +781,7 @@ module HTML5
|
|
|
782
781
|
@state = :before_doctype_system_identifier_state
|
|
783
782
|
else
|
|
784
783
|
@stream.unget(char_stack)
|
|
785
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
784
|
+
@token_queue << {:type => :ParseError, :data => "expected-space-or-right-bracket-in-doctype", "datavars" => {"data" => data}}
|
|
786
785
|
@state = :bogus_doctype_state
|
|
787
786
|
end
|
|
788
787
|
end
|
|
@@ -800,17 +799,17 @@ module HTML5
|
|
|
800
799
|
@current_token[:publicId] = ""
|
|
801
800
|
@state = :doctype_public_identifier_single_quoted_state
|
|
802
801
|
elsif data == ">"
|
|
803
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
802
|
+
@token_queue << {:type => :ParseError, :data => "unexpected-end-of-doctype"}
|
|
804
803
|
@current_token[:correct] = false
|
|
805
804
|
@token_queue << @current_token
|
|
806
805
|
@state = :data_state
|
|
807
806
|
elsif data == :EOF
|
|
808
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
807
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
809
808
|
@current_token[:correct] = false
|
|
810
809
|
@token_queue << @current_token
|
|
811
810
|
@state = :data_state
|
|
812
811
|
else
|
|
813
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
812
|
+
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
|
|
814
813
|
@state = :bogus_doctype_state
|
|
815
814
|
end
|
|
816
815
|
|
|
@@ -822,7 +821,7 @@ module HTML5
|
|
|
822
821
|
if data == "\""
|
|
823
822
|
@state = :after_doctype_public_identifier_state
|
|
824
823
|
elsif data == :EOF
|
|
825
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
824
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
826
825
|
@current_token[:correct] = false
|
|
827
826
|
@token_queue << @current_token
|
|
828
827
|
@state = :data_state
|
|
@@ -837,7 +836,7 @@ module HTML5
|
|
|
837
836
|
if data == "'"
|
|
838
837
|
@state = :after_doctype_public_identifier_state
|
|
839
838
|
elsif data == :EOF
|
|
840
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
839
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
841
840
|
@current_token[:correct] = false
|
|
842
841
|
@token_queue << @current_token
|
|
843
842
|
@state = :data_state
|
|
@@ -860,12 +859,12 @@ module HTML5
|
|
|
860
859
|
@token_queue << @current_token
|
|
861
860
|
@state = :data_state
|
|
862
861
|
elsif data == :EOF
|
|
863
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
862
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
864
863
|
@current_token[:correct] = false
|
|
865
864
|
@token_queue << @current_token
|
|
866
865
|
@state = :data_state
|
|
867
866
|
else
|
|
868
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
867
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
869
868
|
@state = :bogus_doctype_state
|
|
870
869
|
end
|
|
871
870
|
return true
|
|
@@ -881,17 +880,17 @@ module HTML5
|
|
|
881
880
|
@current_token[:systemId] = ""
|
|
882
881
|
@state = :doctype_system_identifier_single_quoted_state
|
|
883
882
|
elsif data == ">"
|
|
884
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
883
|
+
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
|
|
885
884
|
@current_token[:correct] = false
|
|
886
885
|
@token_queue << @current_token
|
|
887
886
|
@state = :data_state
|
|
888
887
|
elsif data == :EOF
|
|
889
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
888
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
890
889
|
@current_token[:correct] = false
|
|
891
890
|
@token_queue << @current_token
|
|
892
891
|
@state = :data_state
|
|
893
892
|
else
|
|
894
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
893
|
+
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
|
|
895
894
|
@state = :bogus_doctype_state
|
|
896
895
|
end
|
|
897
896
|
return true
|
|
@@ -902,7 +901,7 @@ module HTML5
|
|
|
902
901
|
if data == "\""
|
|
903
902
|
@state = :after_doctype_system_identifier_state
|
|
904
903
|
elsif data == :EOF
|
|
905
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
904
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
906
905
|
@current_token[:correct] = false
|
|
907
906
|
@token_queue << @current_token
|
|
908
907
|
@state = :data_state
|
|
@@ -917,7 +916,7 @@ module HTML5
|
|
|
917
916
|
if data == "'"
|
|
918
917
|
@state = :after_doctype_system_identifier_state
|
|
919
918
|
elsif data == :EOF
|
|
920
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
919
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
921
920
|
@current_token[:correct] = false
|
|
922
921
|
@token_queue << @current_token
|
|
923
922
|
@state = :data_state
|
|
@@ -934,12 +933,12 @@ module HTML5
|
|
|
934
933
|
@token_queue << @current_token
|
|
935
934
|
@state = :data_state
|
|
936
935
|
elsif data == :EOF
|
|
937
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
936
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
938
937
|
@current_token[:correct] = false
|
|
939
938
|
@token_queue << @current_token
|
|
940
939
|
@state = :data_state
|
|
941
940
|
else
|
|
942
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
941
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
943
942
|
@state = :bogus_doctype_state
|
|
944
943
|
end
|
|
945
944
|
return true
|
|
@@ -954,7 +953,7 @@ module HTML5
|
|
|
954
953
|
elsif data == :EOF
|
|
955
954
|
# XXX EMIT
|
|
956
955
|
@stream.unget(data)
|
|
957
|
-
@token_queue << {:type => :ParseError, :data =>
|
|
956
|
+
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
|
958
957
|
@current_token[:correct] = false
|
|
959
958
|
@token_queue << @current_token
|
|
960
959
|
@state = :data_state
|
|
@@ -962,7 +961,6 @@ module HTML5
|
|
|
962
961
|
return true
|
|
963
962
|
end
|
|
964
963
|
|
|
965
|
-
def _(string); string; end
|
|
966
964
|
end
|
|
967
965
|
|
|
968
966
|
end
|