spk-html5 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +10 -0
- data/Manifest.txt +73 -0
- data/README +45 -0
- data/Rakefile.rb +33 -0
- data/bin/html5 +7 -0
- data/lib/html5.rb +13 -0
- data/lib/html5/cli.rb +248 -0
- data/lib/html5/constants.rb +1061 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/iso639codes.rb +755 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/rfc2046.rb +31 -0
- data/lib/html5/filters/rfc3987.rb +91 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/validator.rb +834 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser.rb +247 -0
- data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
- data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
- data/lib/html5/html5parser/after_head_phase.rb +55 -0
- data/lib/html5/html5parser/before_head_phase.rb +44 -0
- data/lib/html5/html5parser/before_html_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +636 -0
- data/lib/html5/html5parser/in_caption_phase.rb +69 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
- data/lib/html5/html5parser/in_head_phase.rb +143 -0
- data/lib/html5/html5parser/in_row_phase.rb +96 -0
- data/lib/html5/html5parser/in_select_phase.rb +90 -0
- data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
- data/lib/html5/html5parser/in_table_phase.rb +177 -0
- data/lib/html5/html5parser/initial_phase.rb +133 -0
- data/lib/html5/html5parser/phase.rb +171 -0
- data/lib/html5/inputstream.rb +735 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +209 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/serializer/htmlserializer.rb +179 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/sniffer.rb +45 -0
- data/lib/html5/tokenizer.rb +1059 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treebuilders/base.rb +339 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +215 -0
- data/lib/html5/treebuilders/simpletree.rb +191 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5/treewalkers/base.rb +162 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/version.rb +3 -0
- data/test/preamble.rb +69 -0
- data/test/test_cli.rb +16 -0
- data/test/test_encoding.rb +35 -0
- data/test/test_input_stream.rb +26 -0
- data/test/test_lxp.rb +283 -0
- data/test/test_parser.rb +63 -0
- data/test/test_sanitizer.rb +173 -0
- data/test/test_serializer.rb +67 -0
- data/test/test_sniffer.rb +27 -0
- data/test/test_stream.rb +71 -0
- data/test/test_tokenizer.rb +95 -0
- data/test/test_treewalkers.rb +135 -0
- data/test/test_validator.rb +31 -0
- data/test/tokenizer_test_parser.rb +67 -0
- data/test19.rb +38 -0
- metadata +198 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class BeforeHeadPhase < Phase
|
5
|
+
|
6
|
+
handle_start 'html', 'head'
|
7
|
+
|
8
|
+
handle_end %w( head br ) => 'ImplyHead'
|
9
|
+
|
10
|
+
def process_eof
|
11
|
+
startTagHead('head', {})
|
12
|
+
@parser.phase.process_eof
|
13
|
+
end
|
14
|
+
|
15
|
+
def processSpaceCharacters(data)
|
16
|
+
end
|
17
|
+
|
18
|
+
def processCharacters(data)
|
19
|
+
startTagHead('head', {})
|
20
|
+
@parser.phase.processCharacters(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def startTagHead(name, attributes)
|
24
|
+
@tree.insert_element(name, attributes)
|
25
|
+
@tree.head_pointer = @tree.open_elements[-1]
|
26
|
+
@parser.phase = @parser.phases[:inHead]
|
27
|
+
end
|
28
|
+
|
29
|
+
def startTagOther(name, attributes)
|
30
|
+
startTagHead('head', {})
|
31
|
+
@parser.phase.processStartTag(name, attributes)
|
32
|
+
end
|
33
|
+
|
34
|
+
def endTagImplyHead(name)
|
35
|
+
startTagHead('head', {})
|
36
|
+
@parser.phase.processEndTag(name)
|
37
|
+
end
|
38
|
+
|
39
|
+
def endTagOther(name)
|
40
|
+
parse_error("end-tag-after-implied-root", {"name" => name})
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class BeforeHtmlPhase < Phase
|
5
|
+
|
6
|
+
def process_eof
|
7
|
+
insert_html_element
|
8
|
+
@parser.phase.process_eof
|
9
|
+
end
|
10
|
+
|
11
|
+
def processComment(data)
|
12
|
+
@tree.insert_comment(data, @tree.document)
|
13
|
+
end
|
14
|
+
|
15
|
+
def processSpaceCharacters(data)
|
16
|
+
end
|
17
|
+
|
18
|
+
def processCharacters(data)
|
19
|
+
insert_html_element
|
20
|
+
@parser.phase.processCharacters(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def processStartTag(name, attributes, self_closing=false)
|
24
|
+
@parser.first_start_tag = true if name == 'html'
|
25
|
+
insert_html_element
|
26
|
+
@parser.phase.processStartTag(name, attributes)
|
27
|
+
end
|
28
|
+
|
29
|
+
def processEndTag(name)
|
30
|
+
insert_html_element
|
31
|
+
@parser.phase.processEndTag(name)
|
32
|
+
end
|
33
|
+
|
34
|
+
def insert_html_element
|
35
|
+
element = @tree.createElement('html', {})
|
36
|
+
@tree.open_elements << element
|
37
|
+
@tree.document.appendChild(element)
|
38
|
+
@parser.phase = @parser.phases[:beforeHead]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,636 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InBodyPhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
7
|
+
|
8
|
+
handle_start 'html'
|
9
|
+
handle_start %w(base link meta script style title) => 'ProcessInHead'
|
10
|
+
|
11
|
+
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
12
|
+
|
13
|
+
handle_start 'input', 'textarea', 'select', 'isindex', %w(applet marquee object)
|
14
|
+
|
15
|
+
handle_start %w(li dd dt) => 'ListItem'
|
16
|
+
|
17
|
+
handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
|
18
|
+
|
19
|
+
handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
|
20
|
+
handle_start 'nobr'
|
21
|
+
|
22
|
+
handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
|
23
|
+
|
24
|
+
handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
|
25
|
+
|
26
|
+
handle_start %w(caption col colgroup frame frameset head tbody td tfoot th thead tr) => 'Misplaced'
|
27
|
+
|
28
|
+
handle_start %w(option optgroup)
|
29
|
+
|
30
|
+
handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
|
31
|
+
|
32
|
+
handle_start %w[math] => 'ForeignContent'
|
33
|
+
|
34
|
+
handle_end 'p', 'body', 'html', 'form', %w(applet button marquee object), %w(dd dt li) => 'ListItem'
|
35
|
+
|
36
|
+
handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
|
37
|
+
|
38
|
+
handle_end HEADING_ELEMENTS => 'Heading'
|
39
|
+
|
40
|
+
handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
|
41
|
+
|
42
|
+
handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
|
43
|
+
|
44
|
+
handle_end 'br'
|
45
|
+
|
46
|
+
handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
|
47
|
+
|
48
|
+
handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
49
|
+
|
50
|
+
handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
|
51
|
+
|
52
|
+
def initialize(parser, tree)
|
53
|
+
super(parser, tree)
|
54
|
+
|
55
|
+
# for special handling of whitespace in <pre>
|
56
|
+
class << self
|
57
|
+
alias processSpaceCharactersNonPre processSpaceCharacters
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def processSpaceCharactersDropNewline(data)
|
62
|
+
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
63
|
+
|
64
|
+
class << self
|
65
|
+
remove_method :processSpaceCharacters rescue nil
|
66
|
+
alias processSpaceCharacters processSpaceCharactersNonPre
|
67
|
+
end
|
68
|
+
|
69
|
+
if (data.length > 0 and data[0] == ?\n &&
|
70
|
+
%w[listing pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
|
71
|
+
data = data[1..-1]
|
72
|
+
end
|
73
|
+
|
74
|
+
if data.length > 0
|
75
|
+
@tree.reconstructActiveFormattingElements
|
76
|
+
@tree.insertText(data)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def processSpaceCharacters(data)
|
81
|
+
@tree.reconstructActiveFormattingElements()
|
82
|
+
@tree.insertText(data)
|
83
|
+
end
|
84
|
+
|
85
|
+
def processCharacters(data)
|
86
|
+
# XXX The specification says to do this for every character at the
|
87
|
+
# moment, but apparently that doesn't match the real world so we don't
|
88
|
+
# do it for space characters.
|
89
|
+
@tree.reconstructActiveFormattingElements
|
90
|
+
@tree.insertText(data)
|
91
|
+
end
|
92
|
+
|
93
|
+
def startTagProcessInHead(name, attributes)
|
94
|
+
@parser.phases[:inHead].processStartTag(name, attributes)
|
95
|
+
end
|
96
|
+
|
97
|
+
def startTagBody(name, attributes)
|
98
|
+
parse_error("unexpected-start-tag", {"name" => "body"})
|
99
|
+
|
100
|
+
if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
|
101
|
+
assert @parser.inner_html
|
102
|
+
else
|
103
|
+
attributes.each do |attr, value|
|
104
|
+
unless @tree.open_elements[1].attributes.has_key?(attr)
|
105
|
+
@tree.open_elements[1].attributes[attr] = value
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def startTagCloseP(name, attributes)
|
112
|
+
endTagP('p') if in_scope?('p')
|
113
|
+
@tree.insert_element(name, attributes)
|
114
|
+
if ['pre', 'listing'].include?(name)
|
115
|
+
class << self
|
116
|
+
remove_method :processSpaceCharacters rescue nil
|
117
|
+
alias processSpaceCharacters processSpaceCharactersDropNewline
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def startTagForm(name, attributes)
|
123
|
+
if @tree.formPointer
|
124
|
+
parse_error("unexpected-start-tag", {"name" => name})
|
125
|
+
else
|
126
|
+
endTagP('p') if in_scope?('p')
|
127
|
+
@tree.insert_element(name, attributes)
|
128
|
+
@tree.formPointer = @tree.open_elements.last
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def startTagListItem(name, attributes)
|
133
|
+
endTagP('p') if in_scope?('p')
|
134
|
+
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
135
|
+
stopName = stopNames[name]
|
136
|
+
|
137
|
+
@tree.open_elements.reverse.each_with_index do |node, i|
|
138
|
+
if stopName.include?(node.name)
|
139
|
+
poppedNodes = (0..i).collect { @tree.open_elements.pop }
|
140
|
+
if i >= 1
|
141
|
+
parse_error(
|
142
|
+
i == 1 ? "missing-end-tag" : "missing-end-tags",
|
143
|
+
{"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
|
144
|
+
|
145
|
+
end
|
146
|
+
break
|
147
|
+
end
|
148
|
+
|
149
|
+
# Phrasing elements are all non special, non scoping, non
|
150
|
+
# formatting elements
|
151
|
+
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
|
152
|
+
end
|
153
|
+
|
154
|
+
# Always insert an <li> element.
|
155
|
+
@tree.insert_element(name, attributes)
|
156
|
+
end
|
157
|
+
|
158
|
+
def startTagPlaintext(name, attributes)
|
159
|
+
endTagP('p') if in_scope?('p')
|
160
|
+
@tree.insert_element(name, attributes)
|
161
|
+
@parser.tokenizer.content_model_flag = :PLAINTEXT
|
162
|
+
end
|
163
|
+
|
164
|
+
def startTagHeading(name, attributes)
|
165
|
+
endTagP('p') if in_scope?('p')
|
166
|
+
|
167
|
+
# Uncomment the following for IE7 behavior:
|
168
|
+
# HEADING_ELEMENTS.each do |element|
|
169
|
+
# if in_scope?(element)
|
170
|
+
# parse_error("unexpected-start-tag", {"name" => name})
|
171
|
+
#
|
172
|
+
# remove_open_elements_until do |element|
|
173
|
+
# HEADING_ELEMENTS.include?(element.name)
|
174
|
+
# end
|
175
|
+
#
|
176
|
+
# break
|
177
|
+
# end
|
178
|
+
# end
|
179
|
+
@tree.insert_element(name, attributes)
|
180
|
+
end
|
181
|
+
|
182
|
+
def startTagA(name, attributes)
|
183
|
+
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
184
|
+
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
|
185
|
+
endTagFormatting('a')
|
186
|
+
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
|
187
|
+
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
188
|
+
end
|
189
|
+
@tree.reconstructActiveFormattingElements
|
190
|
+
addFormattingElement(name, attributes)
|
191
|
+
end
|
192
|
+
|
193
|
+
def startTagFormatting(name, attributes)
|
194
|
+
@tree.reconstructActiveFormattingElements
|
195
|
+
addFormattingElement(name, attributes)
|
196
|
+
end
|
197
|
+
|
198
|
+
def startTagNobr(name, attributes)
|
199
|
+
@tree.reconstructActiveFormattingElements
|
200
|
+
if in_scope?('nobr')
|
201
|
+
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
|
202
|
+
processEndTag('nobr')
|
203
|
+
# XXX Need tests that trigger the following
|
204
|
+
@tree.reconstructActiveFormattingElements
|
205
|
+
end
|
206
|
+
addFormattingElement(name, attributes)
|
207
|
+
end
|
208
|
+
|
209
|
+
def startTagButton(name, attributes)
|
210
|
+
if in_scope?('button')
|
211
|
+
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
|
212
|
+
processEndTag('button')
|
213
|
+
@parser.phase.processStartTag(name, attributes)
|
214
|
+
else
|
215
|
+
@tree.reconstructActiveFormattingElements
|
216
|
+
@tree.insert_element(name, attributes)
|
217
|
+
@tree.activeFormattingElements.push(Marker)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def startTagAppletMarqueeObject(name, attributes)
|
222
|
+
@tree.reconstructActiveFormattingElements
|
223
|
+
@tree.insert_element(name, attributes)
|
224
|
+
@tree.activeFormattingElements.push(Marker)
|
225
|
+
end
|
226
|
+
|
227
|
+
def startTagXmp(name, attributes)
|
228
|
+
@tree.reconstructActiveFormattingElements
|
229
|
+
@tree.insert_element(name, attributes)
|
230
|
+
@parser.tokenizer.content_model_flag = :CDATA
|
231
|
+
end
|
232
|
+
|
233
|
+
def startTagTable(name, attributes)
|
234
|
+
processEndTag('p') if in_scope?('p')
|
235
|
+
@tree.insert_element(name, attributes)
|
236
|
+
@parser.phase = @parser.phases[:inTable]
|
237
|
+
end
|
238
|
+
|
239
|
+
def startTagVoidFormatting(name, attributes)
|
240
|
+
@tree.reconstructActiveFormattingElements
|
241
|
+
@tree.insert_element(name, attributes)
|
242
|
+
@tree.open_elements.pop
|
243
|
+
end
|
244
|
+
|
245
|
+
def startTagHr(name, attributes)
|
246
|
+
endTagP('p') if in_scope?('p')
|
247
|
+
@tree.insert_element(name, attributes)
|
248
|
+
@tree.open_elements.pop
|
249
|
+
end
|
250
|
+
|
251
|
+
def startTagImage(name, attributes)
|
252
|
+
# No really...
|
253
|
+
parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
|
254
|
+
processStartTag('img', attributes)
|
255
|
+
end
|
256
|
+
|
257
|
+
def startTagInput(name, attributes)
|
258
|
+
@tree.reconstructActiveFormattingElements
|
259
|
+
@tree.insert_element(name, attributes)
|
260
|
+
if @tree.formPointer
|
261
|
+
# XXX Not exactly sure what to do here
|
262
|
+
# @tree.open_elements[-1].form = @tree.formPointer
|
263
|
+
end
|
264
|
+
@tree.open_elements.pop
|
265
|
+
end
|
266
|
+
|
267
|
+
def startTagIsindex(name, attributes)
|
268
|
+
parse_error("deprecated-tag", {"name" => "isindex"})
|
269
|
+
return if @tree.formPointer
|
270
|
+
processStartTag('form', {})
|
271
|
+
processStartTag('hr', {})
|
272
|
+
processStartTag('p', {})
|
273
|
+
processStartTag('label', {})
|
274
|
+
# XXX Localization ...
|
275
|
+
processCharacters('This is a searchable index. Insert your search keywords here: ')
|
276
|
+
attributes['name'] = 'isindex'
|
277
|
+
attrs = attributes.to_a
|
278
|
+
processStartTag('input', attributes)
|
279
|
+
processEndTag('label')
|
280
|
+
processEndTag('p')
|
281
|
+
processStartTag('hr', {})
|
282
|
+
processEndTag('form')
|
283
|
+
end
|
284
|
+
|
285
|
+
def startTagTextarea(name, attributes)
|
286
|
+
# XXX Form element pointer checking here as well...
|
287
|
+
@tree.insert_element(name, attributes)
|
288
|
+
@parser.tokenizer.content_model_flag = :RCDATA
|
289
|
+
class << self
|
290
|
+
remove_method :processSpaceCharacters rescue nil
|
291
|
+
alias processSpaceCharacters processSpaceCharactersDropNewline
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# iframe, noembed noframes, noscript(if scripting enabled)
|
296
|
+
def startTagCdata(name, attributes)
|
297
|
+
@tree.insert_element(name, attributes)
|
298
|
+
@parser.tokenizer.content_model_flag = :CDATA
|
299
|
+
end
|
300
|
+
|
301
|
+
def startTagSelect(name, attributes)
|
302
|
+
@tree.reconstructActiveFormattingElements
|
303
|
+
@tree.insert_element(name, attributes)
|
304
|
+
|
305
|
+
if [@parser.phases[:inTable], @parser.phases[:inCaption],
|
306
|
+
@parser.phases[:inColumnGroup], @parser.phases[:inTableBody], @parser.phases[:inRow],
|
307
|
+
@parser.phases[:inCell]].include?(@parser.phase)
|
308
|
+
@parser.phase = @parser.phases[:inSelectInTable]
|
309
|
+
else
|
310
|
+
@parser.phase = @parser.phases[:inSelect]
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def startTagMisplaced(name, attributes)
|
315
|
+
# Elements that should be children of other elements that have a
|
316
|
+
# different insertion mode; here they are ignored
|
317
|
+
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
318
|
+
# "tbody", "td", "tfoot", "th", "thead",
|
319
|
+
# "tr", "noscript"
|
320
|
+
parse_error("unexpected-start-tag-ignored", {"name" => name})
|
321
|
+
end
|
322
|
+
|
323
|
+
def startTagOptionOptgroup(name, attributes)
|
324
|
+
if in_scope?('option')
|
325
|
+
endTagOther('option')
|
326
|
+
end
|
327
|
+
@tree.reconstructActiveFormattingElements
|
328
|
+
@tree.insert_element(name, attributes)
|
329
|
+
end
|
330
|
+
|
331
|
+
def startTagNew(name, attributes)
|
332
|
+
# New HTML5 elements, "event-source", "section", "nav",
|
333
|
+
# "article", "aside", "header", "footer", "datagrid", "command"
|
334
|
+
# $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
|
335
|
+
startTagOther(name, attributes)
|
336
|
+
#raise NotImplementedError
|
337
|
+
end
|
338
|
+
|
339
|
+
def startTagOther(name, attributes)
|
340
|
+
@tree.reconstructActiveFormattingElements
|
341
|
+
@tree.insert_element(name, attributes)
|
342
|
+
end
|
343
|
+
|
344
|
+
def startTagForeignContent(name, attributes)
|
345
|
+
@tree.reconstructActiveFormattingElements
|
346
|
+
attributes = adjust_mathml_attributes(attributes)
|
347
|
+
attributes = adjust_foreign_attributes(attributes)
|
348
|
+
@tree.insert_foreign_element(name, attributes, :math)
|
349
|
+
if false
|
350
|
+
# If the token has its self-closing flag set, pop the current node off the stack
|
351
|
+
# of open elements and acknowledge the token's self-closing flag.
|
352
|
+
else
|
353
|
+
@parser.secondary_phase = @parser.phase
|
354
|
+
@parser.phase = @parser.phases[:inForeignContent]
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
def endTagP(name)
|
359
|
+
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
360
|
+
parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
|
361
|
+
if in_scope?('p')
|
362
|
+
@tree.open_elements.pop while in_scope?('p')
|
363
|
+
else
|
364
|
+
startTagCloseP('p', {})
|
365
|
+
endTagP('p')
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
def endTagBody(name)
|
370
|
+
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
371
|
+
# </p> but we should not throw a parse error either. Specification is
|
372
|
+
# likely to be updated.
|
373
|
+
unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body'
|
374
|
+
# inner_html case
|
375
|
+
parse_error "unexpected-end-tag", {:name => 'body'}
|
376
|
+
return
|
377
|
+
end
|
378
|
+
unless @tree.open_elements.last.name == 'body'
|
379
|
+
parse_error("expected-one-end-tag-but-got-another",
|
380
|
+
{"expectedName" => "body",
|
381
|
+
"gotName" => @tree.open_elements.last.name})
|
382
|
+
end
|
383
|
+
@parser.phase = @parser.phases[:afterBody]
|
384
|
+
end
|
385
|
+
|
386
|
+
def endTagHtml(name)
|
387
|
+
endTagBody(name)
|
388
|
+
@parser.phase.processEndTag(name) unless @parser.inner_html
|
389
|
+
end
|
390
|
+
|
391
|
+
def endTagBlock(name)
|
392
|
+
@tree.generateImpliedEndTags if in_scope?(name)
|
393
|
+
|
394
|
+
unless @tree.open_elements.last.name == name
|
395
|
+
parse_error("end-tag-too-early", {"name" => name})
|
396
|
+
end
|
397
|
+
|
398
|
+
if in_scope?(name)
|
399
|
+
remove_open_elements_until(name)
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
def endTagForm(name)
|
404
|
+
@tree.formPointer = nil
|
405
|
+
if !in_scope?(name)
|
406
|
+
# parse error
|
407
|
+
else
|
408
|
+
@tree.generateImpliedEndTags
|
409
|
+
parse_error("end-tag-too-early-ignored", {"name" => "form"}) if @tree.open_elements.last.name != name
|
410
|
+
until name == @tree.open_elements.pop.name
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
def endTagListItem(name)
|
416
|
+
# AT Could merge this with the Block case
|
417
|
+
@tree.generateImpliedEndTags(name) if in_scope?(name)
|
418
|
+
|
419
|
+
unless @tree.open_elements.last.name == name
|
420
|
+
parse_error("end-tag-too-early", {"name" => name})
|
421
|
+
end
|
422
|
+
|
423
|
+
remove_open_elements_until(name) if in_scope?(name)
|
424
|
+
end
|
425
|
+
|
426
|
+
def endTagHeading(name)
|
427
|
+
HEADING_ELEMENTS.each do |element|
|
428
|
+
if in_scope?(element)
|
429
|
+
@tree.generateImpliedEndTags
|
430
|
+
break
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
unless @tree.open_elements.last.name == name
|
435
|
+
parse_error("end-tag-too-early", {"name" => name})
|
436
|
+
end
|
437
|
+
|
438
|
+
HEADING_ELEMENTS.each do |element|
|
439
|
+
if in_scope?(element)
|
440
|
+
remove_open_elements_until {|el| HEADING_ELEMENTS.include?(el.name)}
|
441
|
+
break
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
# The much-feared adoption agency algorithm
|
447
|
+
def endTagFormatting(name)
|
448
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
449
|
+
# XXX Better parse_error messages appreciated.
|
450
|
+
while true
|
451
|
+
# Step 1 paragraph 1
|
452
|
+
afeElement = @tree.elementInActiveFormattingElements(name)
|
453
|
+
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
|
454
|
+
parse_error("adoption-agency-1.1", {"name" => name})
|
455
|
+
return
|
456
|
+
# Step 1 paragraph 2
|
457
|
+
elsif not @tree.open_elements.include?(afeElement)
|
458
|
+
parse_error("adoption-agency-1.2", {"name" => name})
|
459
|
+
@tree.activeFormattingElements.delete(afeElement)
|
460
|
+
return
|
461
|
+
end
|
462
|
+
|
463
|
+
# Step 1 paragraph 3
|
464
|
+
if afeElement != @tree.open_elements.last
|
465
|
+
parse_error("adoption-agency-1.3", {"name" => name})
|
466
|
+
end
|
467
|
+
|
468
|
+
# Step 2
|
469
|
+
# Start of the adoption agency algorithm proper
|
470
|
+
afeIndex = @tree.open_elements.index(afeElement)
|
471
|
+
furthestBlock = nil
|
472
|
+
@tree.open_elements[afeIndex..-1].each do |element|
|
473
|
+
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
474
|
+
furthestBlock = element
|
475
|
+
break
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
# Step 3
|
480
|
+
if furthestBlock.nil?
|
481
|
+
element = remove_open_elements_until {|el| el == afeElement }
|
482
|
+
@tree.activeFormattingElements.delete(element)
|
483
|
+
return
|
484
|
+
end
|
485
|
+
commonAncestor = @tree.open_elements[afeIndex - 1]
|
486
|
+
|
487
|
+
# Step 5
|
488
|
+
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
489
|
+
|
490
|
+
# Step 6
|
491
|
+
# The bookmark is supposed to help us identify where to reinsert
|
492
|
+
# nodes in step 12. We have to ensure that we reinsert nodes after
|
493
|
+
# the node before the active formatting element. Note the bookmark
|
494
|
+
# can move in step 7.4
|
495
|
+
bookmark = @tree.activeFormattingElements.index(afeElement)
|
496
|
+
|
497
|
+
# Step 7
|
498
|
+
lastNode = node = furthestBlock
|
499
|
+
while true
|
500
|
+
# AT replace this with a function and recursion?
|
501
|
+
# Node is element before node in open elements
|
502
|
+
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
503
|
+
until @tree.activeFormattingElements.include?(node)
|
504
|
+
tmpNode = node
|
505
|
+
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
506
|
+
@tree.open_elements.delete(tmpNode)
|
507
|
+
end
|
508
|
+
# Step 7.3
|
509
|
+
break if node == afeElement
|
510
|
+
# Step 7.4
|
511
|
+
if lastNode == furthestBlock
|
512
|
+
# XXX should this be index(node) or index(node)+1
|
513
|
+
# Anne: I think +1 is ok. Given x = [2,3,4,5]
|
514
|
+
# x.index(3) gives 1 and then x[1 +1] gives 4...
|
515
|
+
bookmark = @tree.activeFormattingElements.index(node) + 1
|
516
|
+
end
|
517
|
+
# Step 7.5
|
518
|
+
cite = node.parent
|
519
|
+
if node.hasContent
|
520
|
+
clone = node.cloneNode
|
521
|
+
# Replace node with clone
|
522
|
+
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
523
|
+
@tree.open_elements[@tree.open_elements.index(node)] = clone
|
524
|
+
node = clone
|
525
|
+
end
|
526
|
+
# Step 7.6
|
527
|
+
# Remove lastNode from its parents, if any
|
528
|
+
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
529
|
+
node.appendChild(lastNode)
|
530
|
+
# Step 7.7
|
531
|
+
lastNode = node
|
532
|
+
# End of inner loop
|
533
|
+
end
|
534
|
+
|
535
|
+
# Step 8
|
536
|
+
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
537
|
+
commonAncestor.appendChild(lastNode)
|
538
|
+
|
539
|
+
# Step 9
|
540
|
+
clone = afeElement.cloneNode
|
541
|
+
|
542
|
+
# Step 10
|
543
|
+
furthestBlock.reparentChildren(clone)
|
544
|
+
|
545
|
+
# Step 11
|
546
|
+
furthestBlock.appendChild(clone)
|
547
|
+
|
548
|
+
# Step 12
|
549
|
+
@tree.activeFormattingElements.delete(afeElement)
|
550
|
+
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
551
|
+
|
552
|
+
# Step 13
|
553
|
+
@tree.open_elements.delete(afeElement)
|
554
|
+
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
|
555
|
+
end
|
556
|
+
end
|
557
|
+
|
558
|
+
def endTagAppletButtonMarqueeObject(name)
|
559
|
+
@tree.generateImpliedEndTags if in_scope?(name)
|
560
|
+
|
561
|
+
unless @tree.open_elements.last.name == name
|
562
|
+
parse_error("end-tag-too-early", {"name" => name})
|
563
|
+
end
|
564
|
+
|
565
|
+
if in_scope?(name)
|
566
|
+
remove_open_elements_until(name)
|
567
|
+
|
568
|
+
@tree.clearActiveFormattingElements
|
569
|
+
end
|
570
|
+
end
|
571
|
+
|
572
|
+
def endTagMisplaced(name)
|
573
|
+
# This handles elements with end tags in other insertion modes.
|
574
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
575
|
+
end
|
576
|
+
|
577
|
+
def endTagBr(name)
|
578
|
+
parse_error("unexpected-end-tag-treated-as",
|
579
|
+
{"originalName" => "br", "newName" => "br element"})
|
580
|
+
@tree.reconstructActiveFormattingElements
|
581
|
+
@tree.insert_element(name, {})
|
582
|
+
@tree.open_elements.pop()
|
583
|
+
end
|
584
|
+
|
585
|
+
def endTagNone(name)
|
586
|
+
# This handles elements with no end tag.
|
587
|
+
parse_error("no-end-tag", {"name" => name})
|
588
|
+
end
|
589
|
+
|
590
|
+
def endTagCdataTextAreaXmp(name)
|
591
|
+
if @tree.open_elements.last.name == name
|
592
|
+
@tree.open_elements.pop
|
593
|
+
else
|
594
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
def endTagNew(name)
|
599
|
+
# New HTML5 elements, "event-source", "section", "nav",
|
600
|
+
# "article", "aside", "header", "footer", "datagrid", "command"
|
601
|
+
# STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
|
602
|
+
endTagOther(name)
|
603
|
+
#raise NotImplementedError
|
604
|
+
end
|
605
|
+
|
606
|
+
def endTagOther(name)
|
607
|
+
# XXX This logic should be moved into the treebuilder
|
608
|
+
@tree.open_elements.reverse.each do |node|
|
609
|
+
if node.name == name
|
610
|
+
@tree.generateImpliedEndTags
|
611
|
+
|
612
|
+
unless @tree.open_elements.last.name == name
|
613
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
614
|
+
end
|
615
|
+
|
616
|
+
remove_open_elements_until {|element| element == node }
|
617
|
+
|
618
|
+
break
|
619
|
+
else
|
620
|
+
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
621
|
+
parse_error("unexpected-end-tag", {"name" => name})
|
622
|
+
break
|
623
|
+
end
|
624
|
+
end
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
protected
|
629
|
+
|
630
|
+
def addFormattingElement(name, attributes)
|
631
|
+
@tree.insert_element(name, attributes)
|
632
|
+
@tree.activeFormattingElements.push(@tree.open_elements.last)
|
633
|
+
end
|
634
|
+
|
635
|
+
end
|
636
|
+
end
|