html5 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +58 -0
- data/README +9 -0
- data/Rakefile.rb +17 -0
- data/lib/html5/constants.rb +818 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
- data/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +607 -0
- data/lib/html5/html5parser/in_caption_phase.rb +68 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/html5/html5parser/in_row_phase.rb +87 -0
- data/lib/html5/html5parser/in_select_phase.rb +84 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
- data/lib/html5/html5parser/in_table_phase.rb +110 -0
- data/lib/html5/html5parser/initial_phase.rb +134 -0
- data/lib/html5/html5parser/phase.rb +158 -0
- data/lib/html5/html5parser/root_element_phase.rb +42 -0
- data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/html5/html5parser.rb +248 -0
- data/lib/html5/inputstream.rb +654 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +188 -0
- data/lib/html5/serializer/htmlserializer.rb +180 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/tokenizer.rb +968 -0
- data/lib/html5/treebuilders/base.rb +334 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +208 -0
- data/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treewalkers/base.rb +154 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5.rb +13 -0
- data/parse.rb +217 -0
- data/tests/preamble.rb +82 -0
- data/tests/test_encoding.rb +35 -0
- data/tests/test_lxp.rb +263 -0
- data/tests/test_parser.rb +68 -0
- data/tests/test_sanitizer.rb +142 -0
- data/tests/test_serializer.rb +68 -0
- data/tests/test_stream.rb +62 -0
- data/tests/test_tokenizer.rb +94 -0
- data/tests/test_treewalkers.rb +116 -0
- data/tests/tokenizer_test_parser.rb +63 -0
- metadata +120 -0
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'html5/filters/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module Filters
|
5
|
+
class InjectMetaCharset < Base
|
6
|
+
def initialize(source, encoding)
|
7
|
+
super(source)
|
8
|
+
@encoding = encoding
|
9
|
+
end
|
10
|
+
|
11
|
+
def each
|
12
|
+
state = :pre_head
|
13
|
+
meta_found = @encoding.nil?
|
14
|
+
pending = []
|
15
|
+
|
16
|
+
__getobj__.each do |token|
|
17
|
+
case token[:type]
|
18
|
+
when :StartTag
|
19
|
+
state = :in_head if token[:name].downcase == "head"
|
20
|
+
|
21
|
+
when :EmptyTag
|
22
|
+
if token[:name].downcase == "meta"
|
23
|
+
# replace charset with actual encoding
|
24
|
+
token[:data].each_with_index do |(name, value), index|
|
25
|
+
if name == 'charset'
|
26
|
+
token[:data][index][1] = @encoding
|
27
|
+
meta_found = true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# replace charset with actual encoding
|
32
|
+
has_http_equiv_content_type = false
|
33
|
+
content_index = -1
|
34
|
+
token[:data].each_with_index do |(name, value), i|
|
35
|
+
if name.downcase == 'charset'
|
36
|
+
token[:data][i] = ['charset', @encoding]
|
37
|
+
meta_found = true
|
38
|
+
break
|
39
|
+
elsif name == 'http-equiv' and value.downcase == 'content-type'
|
40
|
+
has_http_equiv_content_type = true
|
41
|
+
elsif name == 'content'
|
42
|
+
content_index = i
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
if !meta_found
|
47
|
+
if has_http_equiv_content_type && content_index >= 0
|
48
|
+
token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
|
49
|
+
meta_found = true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
elsif token[:name].downcase == "head" && !meta_found
|
54
|
+
# insert meta into empty head
|
55
|
+
yield :type => :StartTag, :name => "head", :data => token[:data]
|
56
|
+
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
|
57
|
+
yield :type => :EndTag, :name => "head"
|
58
|
+
meta_found = true
|
59
|
+
next
|
60
|
+
end
|
61
|
+
|
62
|
+
when :EndTag
|
63
|
+
if token[:name].downcase == "head" && pending.any?
|
64
|
+
# insert meta into head (if necessary) and flush pending queue
|
65
|
+
yield pending.shift
|
66
|
+
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
|
67
|
+
yield pending.shift while pending.any?
|
68
|
+
meta_found = true
|
69
|
+
state = :post_head
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
if state == :in_head
|
74
|
+
pending << token
|
75
|
+
else
|
76
|
+
yield token
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
require 'html5/constants'
|
2
|
+
require 'html5/filters/base'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
module Filters
|
6
|
+
|
7
|
+
class OptionalTagFilter < Base
|
8
|
+
def slider
|
9
|
+
previous1 = previous2 = nil
|
10
|
+
__getobj__.each do |token|
|
11
|
+
yield previous2, previous1, token if previous1 != nil
|
12
|
+
previous2 = previous1
|
13
|
+
previous1 = token
|
14
|
+
end
|
15
|
+
yield previous2, previous1, nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def each
|
19
|
+
slider do |previous, token, nexttok|
|
20
|
+
type = token[:type]
|
21
|
+
if type == :StartTag
|
22
|
+
yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
|
23
|
+
elsif type == :EndTag
|
24
|
+
yield token unless is_optional_end(token[:name], nexttok)
|
25
|
+
else
|
26
|
+
yield token
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def is_optional_start(tagname, previous, nexttok)
|
32
|
+
type = nexttok ? nexttok[:type] : nil
|
33
|
+
if tagname == 'html'
|
34
|
+
# An html element's start tag may be omitted if the first thing
|
35
|
+
# inside the html element is not a space character or a comment.
|
36
|
+
return ![:Comment, :SpaceCharacters].include?(type)
|
37
|
+
elsif tagname == 'head'
|
38
|
+
# A head element's start tag may be omitted if the first thing
|
39
|
+
# inside the head element is an element.
|
40
|
+
return type == :StartTag
|
41
|
+
elsif tagname == 'body'
|
42
|
+
# A body element's start tag may be omitted if the first thing
|
43
|
+
# inside the body element is not a space character or a comment,
|
44
|
+
# except if the first thing inside the body element is a script
|
45
|
+
# or style element and the node immediately preceding the body
|
46
|
+
# element is a head element whose end tag has been omitted.
|
47
|
+
if [:Comment, :SpaceCharacters].include?(type)
|
48
|
+
return false
|
49
|
+
elsif type == :StartTag
|
50
|
+
# XXX: we do not look at the preceding event, so we never omit
|
51
|
+
# the body element's start tag if it's followed by a script or
|
52
|
+
# a style element.
|
53
|
+
return !%w[script style].include?(nexttok[:name])
|
54
|
+
else
|
55
|
+
return true
|
56
|
+
end
|
57
|
+
elsif tagname == 'colgroup'
|
58
|
+
# A colgroup element's start tag may be omitted if the first thing
|
59
|
+
# inside the colgroup element is a col element, and if the element
|
60
|
+
# is not immediately preceeded by another colgroup element whose
|
61
|
+
# end tag has been omitted.
|
62
|
+
if type == :StartTag
|
63
|
+
# XXX: we do not look at the preceding event, so instead we never
|
64
|
+
# omit the colgroup element's end tag when it is immediately
|
65
|
+
# followed by another colgroup element. See is_optional_end.
|
66
|
+
return nexttok[:name] == "col"
|
67
|
+
else
|
68
|
+
return false
|
69
|
+
end
|
70
|
+
elsif tagname == 'tbody'
|
71
|
+
# A tbody element's start tag may be omitted if the first thing
|
72
|
+
# inside the tbody element is a tr element, and if the element is
|
73
|
+
# not immediately preceeded by a tbody, thead, or tfoot element
|
74
|
+
# whose end tag has been omitted.
|
75
|
+
if type == :StartTag
|
76
|
+
# omit the thead and tfoot elements' end tag when they are
|
77
|
+
# immediately followed by a tbody element. See is_optional_end.
|
78
|
+
if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
|
79
|
+
return false
|
80
|
+
end
|
81
|
+
|
82
|
+
return nexttok[:name] == 'tr'
|
83
|
+
else
|
84
|
+
return false
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return false
|
88
|
+
end
|
89
|
+
|
90
|
+
def is_optional_end(tagname, nexttok)
|
91
|
+
type = nexttok ? nexttok[:type] : nil
|
92
|
+
if %w[html head body].include?(tagname)
|
93
|
+
# An html element's end tag may be omitted if the html element
|
94
|
+
# is not immediately followed by a space character or a comment.
|
95
|
+
return ![:Comment, :SpaceCharacters].include?(type)
|
96
|
+
elsif %w[li optgroup option tr].include?(tagname)
|
97
|
+
# A li element's end tag may be omitted if the li element is
|
98
|
+
# immediately followed by another li element or if there is
|
99
|
+
# no more content in the parent element.
|
100
|
+
# An optgroup element's end tag may be omitted if the optgroup
|
101
|
+
# element is immediately followed by another optgroup element,
|
102
|
+
# or if there is no more content in the parent element.
|
103
|
+
# An option element's end tag may be omitted if the option
|
104
|
+
# element is immediately followed by another option element,
|
105
|
+
# or if there is no more content in the parent element.
|
106
|
+
# A tr element's end tag may be omitted if the tr element is
|
107
|
+
# immediately followed by another tr element, or if there is
|
108
|
+
# no more content in the parent element.
|
109
|
+
if type == :StartTag
|
110
|
+
return nexttok[:name] == tagname
|
111
|
+
else
|
112
|
+
return type == :EndTag || type == nil
|
113
|
+
end
|
114
|
+
elsif %w(dt dd).include?(tagname)
|
115
|
+
# A dt element's end tag may be omitted if the dt element is
|
116
|
+
# immediately followed by another dt element or a dd element.
|
117
|
+
# A dd element's end tag may be omitted if the dd element is
|
118
|
+
# immediately followed by another dd element or a dt element,
|
119
|
+
# or if there is no more content in the parent element.
|
120
|
+
if type == :StartTag
|
121
|
+
return %w(dt dd).include?(nexttok[:name])
|
122
|
+
elsif tagname == 'dd'
|
123
|
+
return type == :EndTag || type == nil
|
124
|
+
else
|
125
|
+
return false
|
126
|
+
end
|
127
|
+
elsif tagname == 'p'
|
128
|
+
# A p element's end tag may be omitted if the p element is
|
129
|
+
# immediately followed by an address, blockquote, dl, fieldset,
|
130
|
+
# form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
|
131
|
+
# or ul element, or if there is no more content in the parent
|
132
|
+
# element.
|
133
|
+
if type == :StartTag
|
134
|
+
return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
|
135
|
+
h6 hr menu ol p pre table ul).include?(nexttok[:name])
|
136
|
+
else
|
137
|
+
return type == :EndTag || type == nil
|
138
|
+
end
|
139
|
+
elsif tagname == 'colgroup'
|
140
|
+
# A colgroup element's end tag may be omitted if the colgroup
|
141
|
+
# element is not immediately followed by a space character or
|
142
|
+
# a comment.
|
143
|
+
if [:Comment, :SpaceCharacters].include?(type)
|
144
|
+
return false
|
145
|
+
elsif type == :StartTag
|
146
|
+
# XXX: we also look for an immediately following colgroup
|
147
|
+
# element. See is_optional_start.
|
148
|
+
return nexttok[:name] != 'colgroup'
|
149
|
+
else
|
150
|
+
return true
|
151
|
+
end
|
152
|
+
elsif %w(thead tbody).include? tagname
|
153
|
+
# A thead element's end tag may be omitted if the thead element
|
154
|
+
# is immediately followed by a tbody or tfoot element.
|
155
|
+
# A tbody element's end tag may be omitted if the tbody element
|
156
|
+
# is immediately followed by a tbody or tfoot element, or if
|
157
|
+
# there is no more content in the parent element.
|
158
|
+
# A tfoot element's end tag may be omitted if the tfoot element
|
159
|
+
# is immediately followed by a tbody element, or if there is no
|
160
|
+
# more content in the parent element.
|
161
|
+
# XXX: we never omit the end tag when the following element is
|
162
|
+
# a tbody. See is_optional_start.
|
163
|
+
if type == :StartTag
|
164
|
+
return %w(tbody tfoot).include?(nexttok[:name])
|
165
|
+
elsif tagname == 'tbody'
|
166
|
+
return (type == :EndTag or type == nil)
|
167
|
+
else
|
168
|
+
return false
|
169
|
+
end
|
170
|
+
elsif tagname == 'tfoot'
|
171
|
+
# A tfoot element's end tag may be omitted if the tfoot element
|
172
|
+
# is immediately followed by a tbody element, or if there is no
|
173
|
+
# more content in the parent element.
|
174
|
+
# XXX: we never omit the end tag when the following element is
|
175
|
+
# a tbody. See is_optional_start.
|
176
|
+
if type == :StartTag
|
177
|
+
return nexttok[:name] == 'tbody'
|
178
|
+
else
|
179
|
+
return type == :EndTag || type == nil
|
180
|
+
end
|
181
|
+
elsif %w(td th).include? tagname
|
182
|
+
# A td element's end tag may be omitted if the td element is
|
183
|
+
# immediately followed by a td or th element, or if there is
|
184
|
+
# no more content in the parent element.
|
185
|
+
# A th element's end tag may be omitted if the th element is
|
186
|
+
# immediately followed by a td or th element, or if there is
|
187
|
+
# no more content in the parent element.
|
188
|
+
if type == :StartTag
|
189
|
+
return %w(td th).include?(nexttok[:name])
|
190
|
+
else
|
191
|
+
return type == :EndTag || type == nil
|
192
|
+
end
|
193
|
+
end
|
194
|
+
return false
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'html5/filters/base'
|
2
|
+
require 'html5/sanitizer'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
module Filters
|
6
|
+
class HTMLSanitizeFilter < Base
|
7
|
+
include HTMLSanitizeModule
|
8
|
+
def each
|
9
|
+
__getobj__.each do |token|
|
10
|
+
yield(sanitize_token(token))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'html5/constants'
|
2
|
+
require 'html5/filters/base'
|
3
|
+
|
4
|
+
module HTML5
|
5
|
+
module Filters
|
6
|
+
class WhitespaceFilter < Base
|
7
|
+
|
8
|
+
SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS
|
9
|
+
SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m
|
10
|
+
|
11
|
+
def each
|
12
|
+
preserve = 0
|
13
|
+
__getobj__.each do |token|
|
14
|
+
case token[:type]
|
15
|
+
when :StartTag
|
16
|
+
if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name])
|
17
|
+
preserve += 1
|
18
|
+
end
|
19
|
+
|
20
|
+
when :EndTag
|
21
|
+
preserve -= 1 if preserve > 0
|
22
|
+
|
23
|
+
when :SpaceCharacters
|
24
|
+
token[:data] = " " if preserve == 0 && token[:data]
|
25
|
+
|
26
|
+
when :Characters
|
27
|
+
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
|
28
|
+
end
|
29
|
+
|
30
|
+
yield token
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class AfterBodyPhase < Phase
|
5
|
+
|
6
|
+
handle_end 'html'
|
7
|
+
|
8
|
+
def processComment(data)
|
9
|
+
# This is needed because data is to be appended to the <html> element
|
10
|
+
# here and not to whatever is currently open.
|
11
|
+
@tree.insert_comment(data, @tree.open_elements.first)
|
12
|
+
end
|
13
|
+
|
14
|
+
def processCharacters(data)
|
15
|
+
parse_error(_('Unexpected non-space characters in the after body phase.'))
|
16
|
+
@parser.phase = @parser.phases[:inBody]
|
17
|
+
@parser.phase.processCharacters(data)
|
18
|
+
end
|
19
|
+
|
20
|
+
def processStartTag(name, attributes)
|
21
|
+
parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
|
22
|
+
@parser.phase = @parser.phases[:inBody]
|
23
|
+
@parser.phase.processStartTag(name, attributes)
|
24
|
+
end
|
25
|
+
|
26
|
+
def endTagHtml(name)
|
27
|
+
if @parser.inner_html
|
28
|
+
parse_error
|
29
|
+
else
|
30
|
+
# XXX: This may need to be done, not sure
|
31
|
+
# Don't set last_phase to the current phase but to the inBody phase
|
32
|
+
# instead. No need for extra parse errors if there's something after </html>.
|
33
|
+
# Try "<!doctype html>X</html>X" for instance.
|
34
|
+
@parser.last_phase = @parser.phase
|
35
|
+
@parser.phase = @parser.phases[:trailingEnd]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def endTagOther(name)
|
40
|
+
parse_error(_("Unexpected end tag token (#{name}) in the after body phase."))
|
41
|
+
@parser.phase = @parser.phases[:inBody]
|
42
|
+
@parser.phase.processEndTag(name)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class AfterFramesetPhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
7
|
+
|
8
|
+
handle_start 'html', 'noframes'
|
9
|
+
|
10
|
+
handle_end 'html'
|
11
|
+
|
12
|
+
def processCharacters(data)
|
13
|
+
parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
|
14
|
+
end
|
15
|
+
|
16
|
+
def startTagNoframes(name, attributes)
|
17
|
+
@parser.phases[:inBody].processStartTag(name, attributes)
|
18
|
+
end
|
19
|
+
|
20
|
+
def startTagOther(name, attributes)
|
21
|
+
parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
|
22
|
+
end
|
23
|
+
|
24
|
+
def endTagHtml(name)
|
25
|
+
@parser.last_phase = @parser.phase
|
26
|
+
@parser.phase = @parser.phases[:trailingEnd]
|
27
|
+
end
|
28
|
+
|
29
|
+
def endTagOther(name)
|
30
|
+
parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class AfterHeadPhase < Phase
|
5
|
+
|
6
|
+
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
|
7
|
+
|
8
|
+
def process_eof
|
9
|
+
anythingElse
|
10
|
+
@parser.phase.process_eof
|
11
|
+
end
|
12
|
+
|
13
|
+
def processCharacters(data)
|
14
|
+
anythingElse
|
15
|
+
@parser.phase.processCharacters(data)
|
16
|
+
end
|
17
|
+
|
18
|
+
def startTagBody(name, attributes)
|
19
|
+
@tree.insert_element(name, attributes)
|
20
|
+
@parser.phase = @parser.phases[:inBody]
|
21
|
+
end
|
22
|
+
|
23
|
+
def startTagFrameset(name, attributes)
|
24
|
+
@tree.insert_element(name, attributes)
|
25
|
+
@parser.phase = @parser.phases[:inFrameset]
|
26
|
+
end
|
27
|
+
|
28
|
+
def startTagFromHead(name, attributes)
|
29
|
+
parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved."))
|
30
|
+
@parser.phase = @parser.phases[:inHead]
|
31
|
+
@parser.phase.processStartTag(name, attributes)
|
32
|
+
end
|
33
|
+
|
34
|
+
def startTagOther(name, attributes)
|
35
|
+
anythingElse
|
36
|
+
@parser.phase.processStartTag(name, attributes)
|
37
|
+
end
|
38
|
+
|
39
|
+
def processEndTag(name)
|
40
|
+
anythingElse
|
41
|
+
@parser.phase.processEndTag(name)
|
42
|
+
end
|
43
|
+
|
44
|
+
def anythingElse
|
45
|
+
@tree.insert_element('body', {})
|
46
|
+
@parser.phase = @parser.phases[:inBody]
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class BeforeHeadPhase < Phase
|
5
|
+
|
6
|
+
handle_start 'html', 'head'
|
7
|
+
|
8
|
+
handle_end %w( html head body br p ) => 'ImplyHead'
|
9
|
+
|
10
|
+
def process_eof
|
11
|
+
startTagHead('head', {})
|
12
|
+
@parser.phase.process_eof
|
13
|
+
end
|
14
|
+
|
15
|
+
def processCharacters(data)
|
16
|
+
startTagHead('head', {})
|
17
|
+
@parser.phase.processCharacters(data)
|
18
|
+
end
|
19
|
+
|
20
|
+
def startTagHead(name, attributes)
|
21
|
+
@tree.insert_element(name, attributes)
|
22
|
+
@tree.head_pointer = @tree.open_elements[-1]
|
23
|
+
@parser.phase = @parser.phases[:inHead]
|
24
|
+
end
|
25
|
+
|
26
|
+
def startTagOther(name, attributes)
|
27
|
+
startTagHead('head', {})
|
28
|
+
@parser.phase.processStartTag(name, attributes)
|
29
|
+
end
|
30
|
+
|
31
|
+
def endTagImplyHead(name)
|
32
|
+
startTagHead('head', {})
|
33
|
+
@parser.phase.processEndTag(name)
|
34
|
+
end
|
35
|
+
|
36
|
+
def endTagOther(name)
|
37
|
+
parse_error(_("Unexpected end tag (#{name}) after the (implied) root element."))
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|