html5 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +3 -0
- data/Manifest.txt +58 -0
- data/README +9 -0
- data/Rakefile.rb +17 -0
- data/lib/html5/constants.rb +818 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
- data/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +607 -0
- data/lib/html5/html5parser/in_caption_phase.rb +68 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/html5/html5parser/in_row_phase.rb +87 -0
- data/lib/html5/html5parser/in_select_phase.rb +84 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
- data/lib/html5/html5parser/in_table_phase.rb +110 -0
- data/lib/html5/html5parser/initial_phase.rb +134 -0
- data/lib/html5/html5parser/phase.rb +158 -0
- data/lib/html5/html5parser/root_element_phase.rb +42 -0
- data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/html5/html5parser.rb +248 -0
- data/lib/html5/inputstream.rb +654 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +188 -0
- data/lib/html5/serializer/htmlserializer.rb +180 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/tokenizer.rb +968 -0
- data/lib/html5/treebuilders/base.rb +334 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +208 -0
- data/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treewalkers/base.rb +154 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5.rb +13 -0
- data/parse.rb +217 -0
- data/tests/preamble.rb +82 -0
- data/tests/test_encoding.rb +35 -0
- data/tests/test_lxp.rb +263 -0
- data/tests/test_parser.rb +68 -0
- data/tests/test_sanitizer.rb +142 -0
- data/tests/test_serializer.rb +68 -0
- data/tests/test_stream.rb +62 -0
- data/tests/test_tokenizer.rb +94 -0
- data/tests/test_treewalkers.rb +116 -0
- data/tests/tokenizer_test_parser.rb +63 -0
- metadata +120 -0
@@ -0,0 +1,607 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InBodyPhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
7
|
+
|
8
|
+
handle_start 'html'
|
9
|
+
handle_start %w( base link meta script style ) => 'ProcessInHead'
|
10
|
+
handle_start 'title'
|
11
|
+
|
12
|
+
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
13
|
+
|
14
|
+
handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
|
15
|
+
|
16
|
+
handle_start %w( li dd dt ) => 'ListItem'
|
17
|
+
|
18
|
+
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
|
19
|
+
|
20
|
+
handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
|
21
|
+
handle_start 'nobr'
|
22
|
+
|
23
|
+
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
|
24
|
+
|
25
|
+
handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading'
|
26
|
+
|
27
|
+
handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced'
|
28
|
+
|
29
|
+
handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
30
|
+
|
31
|
+
handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem'
|
32
|
+
|
33
|
+
handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
|
34
|
+
|
35
|
+
handle_end HEADING_ELEMENTS => 'Heading'
|
36
|
+
|
37
|
+
handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
38
|
+
|
39
|
+
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
|
40
|
+
|
41
|
+
handle_end 'br'
|
42
|
+
|
43
|
+
handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
|
44
|
+
|
45
|
+
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
46
|
+
|
47
|
+
handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
48
|
+
|
49
|
+
def initialize(parser, tree)
|
50
|
+
super(parser, tree)
|
51
|
+
|
52
|
+
# for special handling of whitespace in <pre>
|
53
|
+
@processSpaceCharactersDropNewline = false
|
54
|
+
if $-w
|
55
|
+
$-w = false
|
56
|
+
alias processSpaceCharactersNonPre processSpaceCharacters
|
57
|
+
$-w = true
|
58
|
+
else
|
59
|
+
alias processSpaceCharactersNonPre processSpaceCharacters
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def processSpaceCharactersDropNewline(data)
|
64
|
+
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
65
|
+
|
66
|
+
if $-w
|
67
|
+
$-w = false
|
68
|
+
alias processSpaceCharacters processSpaceCharactersNonPre
|
69
|
+
$-w = true
|
70
|
+
else
|
71
|
+
alias processSpaceCharacters processSpaceCharactersNonPre
|
72
|
+
end
|
73
|
+
|
74
|
+
if (data.length > 0 and data[0] == ?\n &&
|
75
|
+
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
|
76
|
+
data = data[1..-1]
|
77
|
+
end
|
78
|
+
|
79
|
+
if data.length > 0
|
80
|
+
@tree.reconstructActiveFormattingElements
|
81
|
+
@tree.insertText(data)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def processSpaceCharacters(data)
|
86
|
+
@tree.reconstructActiveFormattingElements()
|
87
|
+
@tree.insertText(data)
|
88
|
+
end
|
89
|
+
|
90
|
+
def processCharacters(data)
|
91
|
+
# XXX The specification says to do this for every character at the
|
92
|
+
# moment, but apparently that doesn't match the real world so we don't
|
93
|
+
# do it for space characters.
|
94
|
+
@tree.reconstructActiveFormattingElements
|
95
|
+
@tree.insertText(data)
|
96
|
+
end
|
97
|
+
|
98
|
+
def startTagProcessInHead(name, attributes)
|
99
|
+
@parser.phases[:inHead].processStartTag(name, attributes)
|
100
|
+
end
|
101
|
+
|
102
|
+
def startTagTitle(name, attributes)
|
103
|
+
parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
104
|
+
@parser.phases[:inHead].processStartTag(name, attributes)
|
105
|
+
end
|
106
|
+
|
107
|
+
def startTagBody(name, attributes)
|
108
|
+
parse_error(_('Unexpected start tag (body).'))
|
109
|
+
|
110
|
+
if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
|
111
|
+
assert @parser.inner_html
|
112
|
+
else
|
113
|
+
attributes.each do |attr, value|
|
114
|
+
unless @tree.open_elements[1].attributes.has_key?(attr)
|
115
|
+
@tree.open_elements[1].attributes[attr] = value
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def startTagCloseP(name, attributes)
|
122
|
+
endTagP('p') if in_scope?('p')
|
123
|
+
@tree.insert_element(name, attributes)
|
124
|
+
@processSpaceCharactersDropNewline = true if name == 'pre'
|
125
|
+
end
|
126
|
+
|
127
|
+
def startTagForm(name, attributes)
|
128
|
+
if @tree.formPointer
|
129
|
+
parse_error(_('Unexpected start tag (form). Ignored.'))
|
130
|
+
else
|
131
|
+
endTagP('p') if in_scope?('p')
|
132
|
+
@tree.insert_element(name, attributes)
|
133
|
+
@tree.formPointer = @tree.open_elements[-1]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def startTagListItem(name, attributes)
|
138
|
+
endTagP('p') if in_scope?('p')
|
139
|
+
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
140
|
+
stopName = stopNames[name]
|
141
|
+
|
142
|
+
@tree.open_elements.reverse.each_with_index do |node, i|
|
143
|
+
if stopName.include?(node.name)
|
144
|
+
poppedNodes = (0..i).collect { @tree.open_elements.pop }
|
145
|
+
if i >= 1
|
146
|
+
parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
|
147
|
+
end
|
148
|
+
break
|
149
|
+
end
|
150
|
+
|
151
|
+
# Phrasing elements are all non special, non scoping, non
|
152
|
+
# formatting elements
|
153
|
+
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
|
154
|
+
end
|
155
|
+
|
156
|
+
# Always insert an <li> element.
|
157
|
+
@tree.insert_element(name, attributes)
|
158
|
+
end
|
159
|
+
|
160
|
+
def startTagPlaintext(name, attributes)
|
161
|
+
endTagP('p') if in_scope?('p')
|
162
|
+
@tree.insert_element(name, attributes)
|
163
|
+
@parser.tokenizer.content_model_flag = :PLAINTEXT
|
164
|
+
end
|
165
|
+
|
166
|
+
def startTagHeading(name, attributes)
|
167
|
+
endTagP('p') if in_scope?('p')
|
168
|
+
|
169
|
+
# Uncomment the following for IE7 behavior:
|
170
|
+
# HEADING_ELEMENTS.each do |element|
|
171
|
+
# if in_scope?(element)
|
172
|
+
# parse_error(_("Unexpected start tag (#{name})."))
|
173
|
+
#
|
174
|
+
# remove_open_elements_until do |element|
|
175
|
+
# HEADING_ELEMENTS.include?(element.name)
|
176
|
+
# end
|
177
|
+
#
|
178
|
+
# break
|
179
|
+
# end
|
180
|
+
# end
|
181
|
+
@tree.insert_element(name, attributes)
|
182
|
+
end
|
183
|
+
|
184
|
+
def startTagA(name, attributes)
|
185
|
+
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
186
|
+
parse_error(_('Unexpected start tag (a) implies end tag (a).'))
|
187
|
+
endTagFormatting('a')
|
188
|
+
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
|
189
|
+
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
190
|
+
end
|
191
|
+
@tree.reconstructActiveFormattingElements
|
192
|
+
addFormattingElement(name, attributes)
|
193
|
+
end
|
194
|
+
|
195
|
+
def startTagFormatting(name, attributes)
|
196
|
+
@tree.reconstructActiveFormattingElements
|
197
|
+
addFormattingElement(name, attributes)
|
198
|
+
end
|
199
|
+
|
200
|
+
def startTagNobr(name, attributes)
|
201
|
+
@tree.reconstructActiveFormattingElements
|
202
|
+
if in_scope?('nobr')
|
203
|
+
parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
|
204
|
+
processEndTag('nobr')
|
205
|
+
# XXX Need tests that trigger the following
|
206
|
+
@tree.reconstructActiveFormattingElements
|
207
|
+
end
|
208
|
+
addFormattingElement(name, attributes)
|
209
|
+
end
|
210
|
+
|
211
|
+
def startTagButton(name, attributes)
|
212
|
+
if in_scope?('button')
|
213
|
+
parse_error(_('Unexpected start tag (button) implied end tag (button).'))
|
214
|
+
processEndTag('button')
|
215
|
+
@parser.phase.processStartTag(name, attributes)
|
216
|
+
else
|
217
|
+
@tree.reconstructActiveFormattingElements
|
218
|
+
@tree.insert_element(name, attributes)
|
219
|
+
@tree.activeFormattingElements.push(Marker)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def startTagMarqueeObject(name, attributes)
|
224
|
+
@tree.reconstructActiveFormattingElements
|
225
|
+
@tree.insert_element(name, attributes)
|
226
|
+
@tree.activeFormattingElements.push(Marker)
|
227
|
+
end
|
228
|
+
|
229
|
+
def startTagXmp(name, attributes)
|
230
|
+
@tree.reconstructActiveFormattingElements
|
231
|
+
@tree.insert_element(name, attributes)
|
232
|
+
@parser.tokenizer.content_model_flag = :CDATA
|
233
|
+
end
|
234
|
+
|
235
|
+
def startTagTable(name, attributes)
|
236
|
+
processEndTag('p') if in_scope?('p')
|
237
|
+
@tree.insert_element(name, attributes)
|
238
|
+
@parser.phase = @parser.phases[:inTable]
|
239
|
+
end
|
240
|
+
|
241
|
+
def startTagVoidFormatting(name, attributes)
|
242
|
+
@tree.reconstructActiveFormattingElements
|
243
|
+
@tree.insert_element(name, attributes)
|
244
|
+
@tree.open_elements.pop
|
245
|
+
end
|
246
|
+
|
247
|
+
def startTagHr(name, attributes)
|
248
|
+
endTagP('p') if in_scope?('p')
|
249
|
+
@tree.insert_element(name, attributes)
|
250
|
+
@tree.open_elements.pop
|
251
|
+
end
|
252
|
+
|
253
|
+
def startTagImage(name, attributes)
|
254
|
+
# No really...
|
255
|
+
parse_error(_('Unexpected start tag (image). Treated as img.'))
|
256
|
+
processStartTag('img', attributes)
|
257
|
+
end
|
258
|
+
|
259
|
+
def startTagInput(name, attributes)
|
260
|
+
@tree.reconstructActiveFormattingElements
|
261
|
+
@tree.insert_element(name, attributes)
|
262
|
+
if @tree.formPointer
|
263
|
+
# XXX Not exactly sure what to do here
|
264
|
+
# @tree.open_elements[-1].form = @tree.formPointer
|
265
|
+
end
|
266
|
+
@tree.open_elements.pop
|
267
|
+
end
|
268
|
+
|
269
|
+
def startTagIsindex(name, attributes)
|
270
|
+
parse_error(_("Unexpected start tag isindex. Don't use it!"))
|
271
|
+
return if @tree.formPointer
|
272
|
+
processStartTag('form', {})
|
273
|
+
processStartTag('hr', {})
|
274
|
+
processStartTag('p', {})
|
275
|
+
processStartTag('label', {})
|
276
|
+
# XXX Localization ...
|
277
|
+
processCharacters('This is a searchable index. Insert your search keywords here: ')
|
278
|
+
attributes['name'] = 'isindex'
|
279
|
+
attrs = attributes.to_a
|
280
|
+
processStartTag('input', attributes)
|
281
|
+
processEndTag('label')
|
282
|
+
processEndTag('p')
|
283
|
+
processStartTag('hr', {})
|
284
|
+
processEndTag('form')
|
285
|
+
end
|
286
|
+
|
287
|
+
def startTagTextarea(name, attributes)
|
288
|
+
# XXX Form element pointer checking here as well...
|
289
|
+
@tree.insert_element(name, attributes)
|
290
|
+
@parser.tokenizer.content_model_flag = :RCDATA
|
291
|
+
@processSpaceCharactersDropNewline = true
|
292
|
+
alias processSpaceCharacters processSpaceCharactersDropNewline
|
293
|
+
end
|
294
|
+
|
295
|
+
# iframe, noembed noframes, noscript(if scripting enabled)
|
296
|
+
def startTagCdata(name, attributes)
|
297
|
+
@tree.insert_element(name, attributes)
|
298
|
+
@parser.tokenizer.content_model_flag = :CDATA
|
299
|
+
end
|
300
|
+
|
301
|
+
def startTagSelect(name, attributes)
|
302
|
+
@tree.reconstructActiveFormattingElements
|
303
|
+
@tree.insert_element(name, attributes)
|
304
|
+
@parser.phase = @parser.phases[:inSelect]
|
305
|
+
end
|
306
|
+
|
307
|
+
def startTagMisplaced(name, attributes)
|
308
|
+
# Elements that should be children of other elements that have a
|
309
|
+
# different insertion mode; here they are ignored
|
310
|
+
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
311
|
+
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
312
|
+
# "tr", "noscript"
|
313
|
+
parse_error(_("Unexpected start tag (#{name}). Ignored."))
|
314
|
+
end
|
315
|
+
|
316
|
+
def startTagNew(name, attributes)
|
317
|
+
# New HTML5 elements, "event-source", "section", "nav",
|
318
|
+
# "article", "aside", "header", "footer", "datagrid", "command"
|
319
|
+
sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
|
320
|
+
startTagOther(name, attributes)
|
321
|
+
#raise NotImplementedError
|
322
|
+
end
|
323
|
+
|
324
|
+
def startTagOther(name, attributes)
|
325
|
+
@tree.reconstructActiveFormattingElements
|
326
|
+
@tree.insert_element(name, attributes)
|
327
|
+
end
|
328
|
+
|
329
|
+
def endTagP(name)
|
330
|
+
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
331
|
+
parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
|
332
|
+
if in_scope?('p')
|
333
|
+
@tree.open_elements.pop while in_scope?('p')
|
334
|
+
else
|
335
|
+
startTagCloseP('p', {})
|
336
|
+
endTagP('p')
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def endTagBody(name)
|
341
|
+
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
342
|
+
# </p> but we should not throw a parse error either. Specification is
|
343
|
+
# likely to be updated.
|
344
|
+
unless @tree.open_elements[1].name == 'body'
|
345
|
+
# inner_html case
|
346
|
+
parse_error
|
347
|
+
return
|
348
|
+
end
|
349
|
+
unless @tree.open_elements.last.name == 'body'
|
350
|
+
parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
|
351
|
+
end
|
352
|
+
@parser.phase = @parser.phases[:afterBody]
|
353
|
+
end
|
354
|
+
|
355
|
+
def endTagHtml(name)
|
356
|
+
endTagBody(name)
|
357
|
+
@parser.phase.processEndTag(name) unless @parser.inner_html
|
358
|
+
end
|
359
|
+
|
360
|
+
def endTagBlock(name)
|
361
|
+
#Put us back in the right whitespace handling mode
|
362
|
+
@processSpaceCharactersDropNewline = false if name == 'pre'
|
363
|
+
|
364
|
+
@tree.generateImpliedEndTags if in_scope?(name)
|
365
|
+
|
366
|
+
unless @tree.open_elements.last.name == name
|
367
|
+
parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
|
368
|
+
end
|
369
|
+
|
370
|
+
if in_scope?(name)
|
371
|
+
remove_open_elements_until(name)
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
def endTagForm(name)
|
376
|
+
if in_scope?(name)
|
377
|
+
@tree.generateImpliedEndTags
|
378
|
+
end
|
379
|
+
if @tree.open_elements.last.name != name
|
380
|
+
parse_error(_("End tag (form) seen too early. Ignored."))
|
381
|
+
else
|
382
|
+
@tree.open_elements.pop
|
383
|
+
end
|
384
|
+
@tree.formPointer = nil
|
385
|
+
end
|
386
|
+
|
387
|
+
def endTagListItem(name)
|
388
|
+
# AT Could merge this with the Block case
|
389
|
+
@tree.generateImpliedEndTags(name) if in_scope?(name)
|
390
|
+
|
391
|
+
unless @tree.open_elements.last.name == name
|
392
|
+
parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
|
393
|
+
end
|
394
|
+
|
395
|
+
remove_open_elements_until(name) if in_scope?(name)
|
396
|
+
end
|
397
|
+
|
398
|
+
def endTagHeading(name)
|
399
|
+
HEADING_ELEMENTS.each do |element|
|
400
|
+
if in_scope?(element)
|
401
|
+
@tree.generateImpliedEndTags
|
402
|
+
break
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
unless @tree.open_elements.last.name == name
|
407
|
+
parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
|
408
|
+
end
|
409
|
+
|
410
|
+
HEADING_ELEMENTS.each do |element|
|
411
|
+
if in_scope?(element)
|
412
|
+
remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
|
413
|
+
break
|
414
|
+
end
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
# The much-feared adoption agency algorithm
|
419
|
+
def endTagFormatting(name)
|
420
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
421
|
+
# XXX Better parse_error messages appreciated.
|
422
|
+
while true
|
423
|
+
# Step 1 paragraph 1
|
424
|
+
afeElement = @tree.elementInActiveFormattingElements(name)
|
425
|
+
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
|
426
|
+
parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
|
427
|
+
return
|
428
|
+
# Step 1 paragraph 2
|
429
|
+
elsif not @tree.open_elements.include?(afeElement)
|
430
|
+
parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
|
431
|
+
@tree.activeFormattingElements.delete(afeElement)
|
432
|
+
return
|
433
|
+
end
|
434
|
+
|
435
|
+
# Step 1 paragraph 3
|
436
|
+
if afeElement != @tree.open_elements.last
|
437
|
+
parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
|
438
|
+
end
|
439
|
+
|
440
|
+
# Step 2
|
441
|
+
# Start of the adoption agency algorithm proper
|
442
|
+
afeIndex = @tree.open_elements.index(afeElement)
|
443
|
+
furthestBlock = nil
|
444
|
+
@tree.open_elements[afeIndex..-1].each do |element|
|
445
|
+
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
446
|
+
furthestBlock = element
|
447
|
+
break
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
451
|
+
# Step 3
|
452
|
+
if furthestBlock.nil?
|
453
|
+
element = remove_open_elements_until {|element| element == afeElement }
|
454
|
+
@tree.activeFormattingElements.delete(element)
|
455
|
+
return
|
456
|
+
end
|
457
|
+
commonAncestor = @tree.open_elements[afeIndex - 1]
|
458
|
+
|
459
|
+
# Step 5
|
460
|
+
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
461
|
+
|
462
|
+
# Step 6
|
463
|
+
# The bookmark is supposed to help us identify where to reinsert
|
464
|
+
# nodes in step 12. We have to ensure that we reinsert nodes after
|
465
|
+
# the node before the active formatting element. Note the bookmark
|
466
|
+
# can move in step 7.4
|
467
|
+
bookmark = @tree.activeFormattingElements.index(afeElement)
|
468
|
+
|
469
|
+
# Step 7
|
470
|
+
lastNode = node = furthestBlock
|
471
|
+
while true
|
472
|
+
# AT replace this with a function and recursion?
|
473
|
+
# Node is element before node in open elements
|
474
|
+
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
475
|
+
until @tree.activeFormattingElements.include?(node)
|
476
|
+
tmpNode = node
|
477
|
+
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
478
|
+
@tree.open_elements.delete(tmpNode)
|
479
|
+
end
|
480
|
+
# Step 7.3
|
481
|
+
break if node == afeElement
|
482
|
+
# Step 7.4
|
483
|
+
if lastNode == furthestBlock
|
484
|
+
# XXX should this be index(node) or index(node)+1
|
485
|
+
# Anne: I think +1 is ok. Given x = [2,3,4,5]
|
486
|
+
# x.index(3) gives 1 and then x[1 +1] gives 4...
|
487
|
+
bookmark = @tree.activeFormattingElements.index(node) + 1
|
488
|
+
end
|
489
|
+
# Step 7.5
|
490
|
+
cite = node.parent
|
491
|
+
if node.hasContent
|
492
|
+
clone = node.cloneNode
|
493
|
+
# Replace node with clone
|
494
|
+
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
495
|
+
@tree.open_elements[@tree.open_elements.index(node)] = clone
|
496
|
+
node = clone
|
497
|
+
end
|
498
|
+
# Step 7.6
|
499
|
+
# Remove lastNode from its parents, if any
|
500
|
+
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
501
|
+
node.appendChild(lastNode)
|
502
|
+
# Step 7.7
|
503
|
+
lastNode = node
|
504
|
+
# End of inner loop
|
505
|
+
end
|
506
|
+
|
507
|
+
# Step 8
|
508
|
+
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
509
|
+
commonAncestor.appendChild(lastNode)
|
510
|
+
|
511
|
+
# Step 9
|
512
|
+
clone = afeElement.cloneNode
|
513
|
+
|
514
|
+
# Step 10
|
515
|
+
furthestBlock.reparentChildren(clone)
|
516
|
+
|
517
|
+
# Step 11
|
518
|
+
furthestBlock.appendChild(clone)
|
519
|
+
|
520
|
+
# Step 12
|
521
|
+
@tree.activeFormattingElements.delete(afeElement)
|
522
|
+
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
523
|
+
|
524
|
+
# Step 13
|
525
|
+
@tree.open_elements.delete(afeElement)
|
526
|
+
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
def endTagButtonMarqueeObject(name)
|
531
|
+
@tree.generateImpliedEndTags if in_scope?(name)
|
532
|
+
|
533
|
+
unless @tree.open_elements.last.name == name
|
534
|
+
parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
|
535
|
+
end
|
536
|
+
|
537
|
+
if in_scope?(name)
|
538
|
+
remove_open_elements_until(name)
|
539
|
+
|
540
|
+
@tree.clearActiveFormattingElements
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
def endTagMisplaced(name)
|
545
|
+
# This handles elements with end tags in other insertion modes.
|
546
|
+
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
547
|
+
end
|
548
|
+
|
549
|
+
def endTagBr(name)
|
550
|
+
parse_error(_("Unexpected end tag (br). Treated as br element."))
|
551
|
+
@tree.reconstructActiveFormattingElements
|
552
|
+
@tree.insert_element(name, {})
|
553
|
+
@tree.open_elements.pop()
|
554
|
+
end
|
555
|
+
|
556
|
+
def endTagNone(name)
|
557
|
+
# This handles elements with no end tag.
|
558
|
+
parse_error(_("This tag (#{name}) has no end tag"))
|
559
|
+
end
|
560
|
+
|
561
|
+
def endTagCdataTextAreaXmp(name)
|
562
|
+
if @tree.open_elements.last.name == name
|
563
|
+
@tree.open_elements.pop
|
564
|
+
else
|
565
|
+
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
566
|
+
end
|
567
|
+
end
|
568
|
+
|
569
|
+
def endTagNew(name)
|
570
|
+
# New HTML5 elements, "event-source", "section", "nav",
|
571
|
+
# "article", "aside", "header", "footer", "datagrid", "command"
|
572
|
+
STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
|
573
|
+
endTagOther(name)
|
574
|
+
#raise NotImplementedError
|
575
|
+
end
|
576
|
+
|
577
|
+
def endTagOther(name)
|
578
|
+
# XXX This logic should be moved into the treebuilder
|
579
|
+
@tree.open_elements.reverse.each do |node|
|
580
|
+
if node.name == name
|
581
|
+
@tree.generateImpliedEndTags
|
582
|
+
|
583
|
+
unless @tree.open_elements.last.name == name
|
584
|
+
parse_error(_("Unexpected end tag (#{name})."))
|
585
|
+
end
|
586
|
+
|
587
|
+
remove_open_elements_until {|element| element == node }
|
588
|
+
|
589
|
+
break
|
590
|
+
else
|
591
|
+
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
592
|
+
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
593
|
+
break
|
594
|
+
end
|
595
|
+
end
|
596
|
+
end
|
597
|
+
end
|
598
|
+
|
599
|
+
protected
|
600
|
+
|
601
|
+
def addFormattingElement(name, attributes)
|
602
|
+
@tree.insert_element(name, attributes)
|
603
|
+
@tree.activeFormattingElements.push(@tree.open_elements.last)
|
604
|
+
end
|
605
|
+
|
606
|
+
end
|
607
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'html5/html5parser/phase'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
class InCaptionPhase < Phase
|
5
|
+
|
6
|
+
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
7
|
+
|
8
|
+
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
|
9
|
+
|
10
|
+
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
11
|
+
|
12
|
+
def ignoreEndTagCaption
|
13
|
+
!in_scope?('caption', true)
|
14
|
+
end
|
15
|
+
|
16
|
+
def processCharacters(data)
|
17
|
+
@parser.phases[:inBody].processCharacters(data)
|
18
|
+
end
|
19
|
+
|
20
|
+
def startTagTableElement(name, attributes)
|
21
|
+
parse_error
|
22
|
+
#XXX Have to duplicate logic here to find out if the tag is ignored
|
23
|
+
ignoreEndTag = ignoreEndTagCaption
|
24
|
+
@parser.phase.processEndTag('caption')
|
25
|
+
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
26
|
+
end
|
27
|
+
|
28
|
+
def startTagOther(name, attributes)
|
29
|
+
@parser.phases[:inBody].processStartTag(name, attributes)
|
30
|
+
end
|
31
|
+
|
32
|
+
def endTagCaption(name)
|
33
|
+
if ignoreEndTagCaption
|
34
|
+
# inner_html case
|
35
|
+
assert @parser.inner_html
|
36
|
+
parse_error
|
37
|
+
else
|
38
|
+
# AT this code is quite similar to endTagTable in "InTable"
|
39
|
+
@tree.generateImpliedEndTags
|
40
|
+
|
41
|
+
unless @tree.open_elements[-1].name == 'caption'
|
42
|
+
parse_error(_("Unexpected end tag (caption). Missing end tags."))
|
43
|
+
end
|
44
|
+
|
45
|
+
remove_open_elements_until('caption')
|
46
|
+
|
47
|
+
@tree.clearActiveFormattingElements
|
48
|
+
@parser.phase = @parser.phases[:inTable]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def endTagTable(name)
|
53
|
+
parse_error
|
54
|
+
ignoreEndTag = ignoreEndTagCaption
|
55
|
+
@parser.phase.processEndTag('caption')
|
56
|
+
@parser.phase.processEndTag(name) unless ignoreEndTag
|
57
|
+
end
|
58
|
+
|
59
|
+
def endTagIgnore(name)
|
60
|
+
parse_error(_("Unexpected end tag (#{name}). Ignored."))
|
61
|
+
end
|
62
|
+
|
63
|
+
def endTagOther(name)
|
64
|
+
@parser.phases[:inBody].processEndTag(name)
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|