html5 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +58 -0
  3. data/README +9 -0
  4. data/Rakefile.rb +17 -0
  5. data/lib/html5/constants.rb +818 -0
  6. data/lib/html5/filters/base.rb +10 -0
  7. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  8. data/lib/html5/filters/optionaltags.rb +198 -0
  9. data/lib/html5/filters/sanitizer.rb +15 -0
  10. data/lib/html5/filters/whitespace.rb +36 -0
  11. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  12. data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
  13. data/lib/html5/html5parser/after_head_phase.rb +50 -0
  14. data/lib/html5/html5parser/before_head_phase.rb +41 -0
  15. data/lib/html5/html5parser/in_body_phase.rb +607 -0
  16. data/lib/html5/html5parser/in_caption_phase.rb +68 -0
  17. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  18. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  19. data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  20. data/lib/html5/html5parser/in_head_phase.rb +138 -0
  21. data/lib/html5/html5parser/in_row_phase.rb +87 -0
  22. data/lib/html5/html5parser/in_select_phase.rb +84 -0
  23. data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
  24. data/lib/html5/html5parser/in_table_phase.rb +110 -0
  25. data/lib/html5/html5parser/initial_phase.rb +134 -0
  26. data/lib/html5/html5parser/phase.rb +158 -0
  27. data/lib/html5/html5parser/root_element_phase.rb +42 -0
  28. data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  29. data/lib/html5/html5parser.rb +248 -0
  30. data/lib/html5/inputstream.rb +654 -0
  31. data/lib/html5/liberalxmlparser.rb +158 -0
  32. data/lib/html5/sanitizer.rb +188 -0
  33. data/lib/html5/serializer/htmlserializer.rb +180 -0
  34. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  35. data/lib/html5/serializer.rb +2 -0
  36. data/lib/html5/tokenizer.rb +968 -0
  37. data/lib/html5/treebuilders/base.rb +334 -0
  38. data/lib/html5/treebuilders/hpricot.rb +231 -0
  39. data/lib/html5/treebuilders/rexml.rb +208 -0
  40. data/lib/html5/treebuilders/simpletree.rb +185 -0
  41. data/lib/html5/treebuilders.rb +24 -0
  42. data/lib/html5/treewalkers/base.rb +154 -0
  43. data/lib/html5/treewalkers/hpricot.rb +48 -0
  44. data/lib/html5/treewalkers/rexml.rb +48 -0
  45. data/lib/html5/treewalkers/simpletree.rb +48 -0
  46. data/lib/html5/treewalkers.rb +26 -0
  47. data/lib/html5.rb +13 -0
  48. data/parse.rb +217 -0
  49. data/tests/preamble.rb +82 -0
  50. data/tests/test_encoding.rb +35 -0
  51. data/tests/test_lxp.rb +263 -0
  52. data/tests/test_parser.rb +68 -0
  53. data/tests/test_sanitizer.rb +142 -0
  54. data/tests/test_serializer.rb +68 -0
  55. data/tests/test_stream.rb +62 -0
  56. data/tests/test_tokenizer.rb +94 -0
  57. data/tests/test_treewalkers.rb +116 -0
  58. data/tests/tokenizer_test_parser.rb +63 -0
  59. metadata +120 -0
@@ -0,0 +1,607 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-body
7
+
8
+ handle_start 'html'
9
+ handle_start %w( base link meta script style ) => 'ProcessInHead'
10
+ handle_start 'title'
11
+
12
+ handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
13
+
14
+ handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
15
+
16
+ handle_start %w( li dd dt ) => 'ListItem'
17
+
18
+ handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
19
+
20
+ handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
21
+ handle_start 'nobr'
22
+
23
+ handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
24
+
25
+ handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading'
26
+
27
+ handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced'
28
+
29
+ handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New'
30
+
31
+ handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem'
32
+
33
+ handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
34
+
35
+ handle_end HEADING_ELEMENTS => 'Heading'
36
+
37
+ handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
38
+
39
+ handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
40
+
41
+ handle_end 'br'
42
+
43
+ handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
44
+
45
+ handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
46
+
47
+ handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New'
48
+
49
+ def initialize(parser, tree)
50
+ super(parser, tree)
51
+
52
+ # for special handling of whitespace in <pre>
53
+ @processSpaceCharactersDropNewline = false
54
+ if $-w
55
+ $-w = false
56
+ alias processSpaceCharactersNonPre processSpaceCharacters
57
+ $-w = true
58
+ else
59
+ alias processSpaceCharactersNonPre processSpaceCharacters
60
+ end
61
+ end
62
+
63
+ def processSpaceCharactersDropNewline(data)
64
+ # #Sometimes (start of <pre> blocks) we want to drop leading newlines
65
+
66
+ if $-w
67
+ $-w = false
68
+ alias processSpaceCharacters processSpaceCharactersNonPre
69
+ $-w = true
70
+ else
71
+ alias processSpaceCharacters processSpaceCharactersNonPre
72
+ end
73
+
74
+ if (data.length > 0 and data[0] == ?\n &&
75
+ %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
76
+ data = data[1..-1]
77
+ end
78
+
79
+ if data.length > 0
80
+ @tree.reconstructActiveFormattingElements
81
+ @tree.insertText(data)
82
+ end
83
+ end
84
+
85
+ def processSpaceCharacters(data)
86
+ @tree.reconstructActiveFormattingElements()
87
+ @tree.insertText(data)
88
+ end
89
+
90
+ def processCharacters(data)
91
+ # XXX The specification says to do this for every character at the
92
+ # moment, but apparently that doesn't match the real world so we don't
93
+ # do it for space characters.
94
+ @tree.reconstructActiveFormattingElements
95
+ @tree.insertText(data)
96
+ end
97
+
98
+ def startTagProcessInHead(name, attributes)
99
+ @parser.phases[:inHead].processStartTag(name, attributes)
100
+ end
101
+
102
+ def startTagTitle(name, attributes)
103
+ parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
104
+ @parser.phases[:inHead].processStartTag(name, attributes)
105
+ end
106
+
107
+ def startTagBody(name, attributes)
108
+ parse_error(_('Unexpected start tag (body).'))
109
+
110
+ if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
111
+ assert @parser.inner_html
112
+ else
113
+ attributes.each do |attr, value|
114
+ unless @tree.open_elements[1].attributes.has_key?(attr)
115
+ @tree.open_elements[1].attributes[attr] = value
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ def startTagCloseP(name, attributes)
122
+ endTagP('p') if in_scope?('p')
123
+ @tree.insert_element(name, attributes)
124
+ @processSpaceCharactersDropNewline = true if name == 'pre'
125
+ end
126
+
127
+ def startTagForm(name, attributes)
128
+ if @tree.formPointer
129
+ parse_error(_('Unexpected start tag (form). Ignored.'))
130
+ else
131
+ endTagP('p') if in_scope?('p')
132
+ @tree.insert_element(name, attributes)
133
+ @tree.formPointer = @tree.open_elements[-1]
134
+ end
135
+ end
136
+
137
+ def startTagListItem(name, attributes)
138
+ endTagP('p') if in_scope?('p')
139
+ stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
140
+ stopName = stopNames[name]
141
+
142
+ @tree.open_elements.reverse.each_with_index do |node, i|
143
+ if stopName.include?(node.name)
144
+ poppedNodes = (0..i).collect { @tree.open_elements.pop }
145
+ if i >= 1
146
+ parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
147
+ end
148
+ break
149
+ end
150
+
151
+ # Phrasing elements are all non special, non scoping, non
152
+ # formatting elements
153
+ break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
154
+ end
155
+
156
+ # Always insert an <li> element.
157
+ @tree.insert_element(name, attributes)
158
+ end
159
+
160
+ def startTagPlaintext(name, attributes)
161
+ endTagP('p') if in_scope?('p')
162
+ @tree.insert_element(name, attributes)
163
+ @parser.tokenizer.content_model_flag = :PLAINTEXT
164
+ end
165
+
166
+ def startTagHeading(name, attributes)
167
+ endTagP('p') if in_scope?('p')
168
+
169
+ # Uncomment the following for IE7 behavior:
170
+ # HEADING_ELEMENTS.each do |element|
171
+ # if in_scope?(element)
172
+ # parse_error(_("Unexpected start tag (#{name})."))
173
+ #
174
+ # remove_open_elements_until do |element|
175
+ # HEADING_ELEMENTS.include?(element.name)
176
+ # end
177
+ #
178
+ # break
179
+ # end
180
+ # end
181
+ @tree.insert_element(name, attributes)
182
+ end
183
+
184
+ def startTagA(name, attributes)
185
+ if afeAElement = @tree.elementInActiveFormattingElements('a')
186
+ parse_error(_('Unexpected start tag (a) implies end tag (a).'))
187
+ endTagFormatting('a')
188
+ @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
189
+ @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
190
+ end
191
+ @tree.reconstructActiveFormattingElements
192
+ addFormattingElement(name, attributes)
193
+ end
194
+
195
+ def startTagFormatting(name, attributes)
196
+ @tree.reconstructActiveFormattingElements
197
+ addFormattingElement(name, attributes)
198
+ end
199
+
200
+ def startTagNobr(name, attributes)
201
+ @tree.reconstructActiveFormattingElements
202
+ if in_scope?('nobr')
203
+ parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
204
+ processEndTag('nobr')
205
+ # XXX Need tests that trigger the following
206
+ @tree.reconstructActiveFormattingElements
207
+ end
208
+ addFormattingElement(name, attributes)
209
+ end
210
+
211
+ def startTagButton(name, attributes)
212
+ if in_scope?('button')
213
+ parse_error(_('Unexpected start tag (button) implied end tag (button).'))
214
+ processEndTag('button')
215
+ @parser.phase.processStartTag(name, attributes)
216
+ else
217
+ @tree.reconstructActiveFormattingElements
218
+ @tree.insert_element(name, attributes)
219
+ @tree.activeFormattingElements.push(Marker)
220
+ end
221
+ end
222
+
223
+ def startTagMarqueeObject(name, attributes)
224
+ @tree.reconstructActiveFormattingElements
225
+ @tree.insert_element(name, attributes)
226
+ @tree.activeFormattingElements.push(Marker)
227
+ end
228
+
229
+ def startTagXmp(name, attributes)
230
+ @tree.reconstructActiveFormattingElements
231
+ @tree.insert_element(name, attributes)
232
+ @parser.tokenizer.content_model_flag = :CDATA
233
+ end
234
+
235
+ def startTagTable(name, attributes)
236
+ processEndTag('p') if in_scope?('p')
237
+ @tree.insert_element(name, attributes)
238
+ @parser.phase = @parser.phases[:inTable]
239
+ end
240
+
241
+ def startTagVoidFormatting(name, attributes)
242
+ @tree.reconstructActiveFormattingElements
243
+ @tree.insert_element(name, attributes)
244
+ @tree.open_elements.pop
245
+ end
246
+
247
+ def startTagHr(name, attributes)
248
+ endTagP('p') if in_scope?('p')
249
+ @tree.insert_element(name, attributes)
250
+ @tree.open_elements.pop
251
+ end
252
+
253
+ def startTagImage(name, attributes)
254
+ # No really...
255
+ parse_error(_('Unexpected start tag (image). Treated as img.'))
256
+ processStartTag('img', attributes)
257
+ end
258
+
259
+ def startTagInput(name, attributes)
260
+ @tree.reconstructActiveFormattingElements
261
+ @tree.insert_element(name, attributes)
262
+ if @tree.formPointer
263
+ # XXX Not exactly sure what to do here
264
+ # @tree.open_elements[-1].form = @tree.formPointer
265
+ end
266
+ @tree.open_elements.pop
267
+ end
268
+
269
+ def startTagIsindex(name, attributes)
270
+ parse_error(_("Unexpected start tag isindex. Don't use it!"))
271
+ return if @tree.formPointer
272
+ processStartTag('form', {})
273
+ processStartTag('hr', {})
274
+ processStartTag('p', {})
275
+ processStartTag('label', {})
276
+ # XXX Localization ...
277
+ processCharacters('This is a searchable index. Insert your search keywords here: ')
278
+ attributes['name'] = 'isindex'
279
+ attrs = attributes.to_a
280
+ processStartTag('input', attributes)
281
+ processEndTag('label')
282
+ processEndTag('p')
283
+ processStartTag('hr', {})
284
+ processEndTag('form')
285
+ end
286
+
287
+ def startTagTextarea(name, attributes)
288
+ # XXX Form element pointer checking here as well...
289
+ @tree.insert_element(name, attributes)
290
+ @parser.tokenizer.content_model_flag = :RCDATA
291
+ @processSpaceCharactersDropNewline = true
292
+ alias processSpaceCharacters processSpaceCharactersDropNewline
293
+ end
294
+
295
+ # iframe, noembed noframes, noscript(if scripting enabled)
296
+ def startTagCdata(name, attributes)
297
+ @tree.insert_element(name, attributes)
298
+ @parser.tokenizer.content_model_flag = :CDATA
299
+ end
300
+
301
+ def startTagSelect(name, attributes)
302
+ @tree.reconstructActiveFormattingElements
303
+ @tree.insert_element(name, attributes)
304
+ @parser.phase = @parser.phases[:inSelect]
305
+ end
306
+
307
+ def startTagMisplaced(name, attributes)
308
+ # Elements that should be children of other elements that have a
309
+ # different insertion mode; here they are ignored
310
+ # "caption", "col", "colgroup", "frame", "frameset", "head",
311
+ # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
312
+ # "tr", "noscript"
313
+ parse_error(_("Unexpected start tag (#{name}). Ignored."))
314
+ end
315
+
316
+ def startTagNew(name, attributes)
317
+ # New HTML5 elements, "event-source", "section", "nav",
318
+ # "article", "aside", "header", "footer", "datagrid", "command"
319
+ sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
320
+ startTagOther(name, attributes)
321
+ #raise NotImplementedError
322
+ end
323
+
324
+ def startTagOther(name, attributes)
325
+ @tree.reconstructActiveFormattingElements
326
+ @tree.insert_element(name, attributes)
327
+ end
328
+
329
+ def endTagP(name)
330
+ @tree.generateImpliedEndTags('p') if in_scope?('p')
331
+ parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
332
+ if in_scope?('p')
333
+ @tree.open_elements.pop while in_scope?('p')
334
+ else
335
+ startTagCloseP('p', {})
336
+ endTagP('p')
337
+ end
338
+ end
339
+
340
+ def endTagBody(name)
341
+ # XXX Need to take open <p> tags into account here. We shouldn't imply
342
+ # </p> but we should not throw a parse error either. Specification is
343
+ # likely to be updated.
344
+ unless @tree.open_elements[1].name == 'body'
345
+ # inner_html case
346
+ parse_error
347
+ return
348
+ end
349
+ unless @tree.open_elements.last.name == 'body'
350
+ parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
351
+ end
352
+ @parser.phase = @parser.phases[:afterBody]
353
+ end
354
+
355
+ def endTagHtml(name)
356
+ endTagBody(name)
357
+ @parser.phase.processEndTag(name) unless @parser.inner_html
358
+ end
359
+
360
+ def endTagBlock(name)
361
+ #Put us back in the right whitespace handling mode
362
+ @processSpaceCharactersDropNewline = false if name == 'pre'
363
+
364
+ @tree.generateImpliedEndTags if in_scope?(name)
365
+
366
+ unless @tree.open_elements.last.name == name
367
+ parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
368
+ end
369
+
370
+ if in_scope?(name)
371
+ remove_open_elements_until(name)
372
+ end
373
+ end
374
+
375
+ def endTagForm(name)
376
+ if in_scope?(name)
377
+ @tree.generateImpliedEndTags
378
+ end
379
+ if @tree.open_elements.last.name != name
380
+ parse_error(_("End tag (form) seen too early. Ignored."))
381
+ else
382
+ @tree.open_elements.pop
383
+ end
384
+ @tree.formPointer = nil
385
+ end
386
+
387
+ def endTagListItem(name)
388
+ # AT Could merge this with the Block case
389
+ @tree.generateImpliedEndTags(name) if in_scope?(name)
390
+
391
+ unless @tree.open_elements.last.name == name
392
+ parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
393
+ end
394
+
395
+ remove_open_elements_until(name) if in_scope?(name)
396
+ end
397
+
398
+ def endTagHeading(name)
399
+ HEADING_ELEMENTS.each do |element|
400
+ if in_scope?(element)
401
+ @tree.generateImpliedEndTags
402
+ break
403
+ end
404
+ end
405
+
406
+ unless @tree.open_elements.last.name == name
407
+ parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
408
+ end
409
+
410
+ HEADING_ELEMENTS.each do |element|
411
+ if in_scope?(element)
412
+ remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
413
+ break
414
+ end
415
+ end
416
+ end
417
+
418
+ # The much-feared adoption agency algorithm
419
+ def endTagFormatting(name)
420
+ # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
421
+ # XXX Better parse_error messages appreciated.
422
+ while true
423
+ # Step 1 paragraph 1
424
+ afeElement = @tree.elementInActiveFormattingElements(name)
425
+ if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
426
+ parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
427
+ return
428
+ # Step 1 paragraph 2
429
+ elsif not @tree.open_elements.include?(afeElement)
430
+ parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
431
+ @tree.activeFormattingElements.delete(afeElement)
432
+ return
433
+ end
434
+
435
+ # Step 1 paragraph 3
436
+ if afeElement != @tree.open_elements.last
437
+ parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
438
+ end
439
+
440
+ # Step 2
441
+ # Start of the adoption agency algorithm proper
442
+ afeIndex = @tree.open_elements.index(afeElement)
443
+ furthestBlock = nil
444
+ @tree.open_elements[afeIndex..-1].each do |element|
445
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
446
+ furthestBlock = element
447
+ break
448
+ end
449
+ end
450
+
451
+ # Step 3
452
+ if furthestBlock.nil?
453
+ element = remove_open_elements_until {|element| element == afeElement }
454
+ @tree.activeFormattingElements.delete(element)
455
+ return
456
+ end
457
+ commonAncestor = @tree.open_elements[afeIndex - 1]
458
+
459
+ # Step 5
460
+ furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
461
+
462
+ # Step 6
463
+ # The bookmark is supposed to help us identify where to reinsert
464
+ # nodes in step 12. We have to ensure that we reinsert nodes after
465
+ # the node before the active formatting element. Note the bookmark
466
+ # can move in step 7.4
467
+ bookmark = @tree.activeFormattingElements.index(afeElement)
468
+
469
+ # Step 7
470
+ lastNode = node = furthestBlock
471
+ while true
472
+ # AT replace this with a function and recursion?
473
+ # Node is element before node in open elements
474
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
475
+ until @tree.activeFormattingElements.include?(node)
476
+ tmpNode = node
477
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
478
+ @tree.open_elements.delete(tmpNode)
479
+ end
480
+ # Step 7.3
481
+ break if node == afeElement
482
+ # Step 7.4
483
+ if lastNode == furthestBlock
484
+ # XXX should this be index(node) or index(node)+1
485
+ # Anne: I think +1 is ok. Given x = [2,3,4,5]
486
+ # x.index(3) gives 1 and then x[1 +1] gives 4...
487
+ bookmark = @tree.activeFormattingElements.index(node) + 1
488
+ end
489
+ # Step 7.5
490
+ cite = node.parent
491
+ if node.hasContent
492
+ clone = node.cloneNode
493
+ # Replace node with clone
494
+ @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
495
+ @tree.open_elements[@tree.open_elements.index(node)] = clone
496
+ node = clone
497
+ end
498
+ # Step 7.6
499
+ # Remove lastNode from its parents, if any
500
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
501
+ node.appendChild(lastNode)
502
+ # Step 7.7
503
+ lastNode = node
504
+ # End of inner loop
505
+ end
506
+
507
+ # Step 8
508
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
509
+ commonAncestor.appendChild(lastNode)
510
+
511
+ # Step 9
512
+ clone = afeElement.cloneNode
513
+
514
+ # Step 10
515
+ furthestBlock.reparentChildren(clone)
516
+
517
+ # Step 11
518
+ furthestBlock.appendChild(clone)
519
+
520
+ # Step 12
521
+ @tree.activeFormattingElements.delete(afeElement)
522
+ @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
523
+
524
+ # Step 13
525
+ @tree.open_elements.delete(afeElement)
526
+ @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
527
+ end
528
+ end
529
+
530
+ def endTagButtonMarqueeObject(name)
531
+ @tree.generateImpliedEndTags if in_scope?(name)
532
+
533
+ unless @tree.open_elements.last.name == name
534
+ parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
535
+ end
536
+
537
+ if in_scope?(name)
538
+ remove_open_elements_until(name)
539
+
540
+ @tree.clearActiveFormattingElements
541
+ end
542
+ end
543
+
544
+ def endTagMisplaced(name)
545
+ # This handles elements with end tags in other insertion modes.
546
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
547
+ end
548
+
549
+ def endTagBr(name)
550
+ parse_error(_("Unexpected end tag (br). Treated as br element."))
551
+ @tree.reconstructActiveFormattingElements
552
+ @tree.insert_element(name, {})
553
+ @tree.open_elements.pop()
554
+ end
555
+
556
+ def endTagNone(name)
557
+ # This handles elements with no end tag.
558
+ parse_error(_("This tag (#{name}) has no end tag"))
559
+ end
560
+
561
+ def endTagCdataTextAreaXmp(name)
562
+ if @tree.open_elements.last.name == name
563
+ @tree.open_elements.pop
564
+ else
565
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
566
+ end
567
+ end
568
+
569
+ def endTagNew(name)
570
+ # New HTML5 elements, "event-source", "section", "nav",
571
+ # "article", "aside", "header", "footer", "datagrid", "command"
572
+ STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
573
+ endTagOther(name)
574
+ #raise NotImplementedError
575
+ end
576
+
577
+ def endTagOther(name)
578
+ # XXX This logic should be moved into the treebuilder
579
+ @tree.open_elements.reverse.each do |node|
580
+ if node.name == name
581
+ @tree.generateImpliedEndTags
582
+
583
+ unless @tree.open_elements.last.name == name
584
+ parse_error(_("Unexpected end tag (#{name})."))
585
+ end
586
+
587
+ remove_open_elements_until {|element| element == node }
588
+
589
+ break
590
+ else
591
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
592
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
593
+ break
594
+ end
595
+ end
596
+ end
597
+ end
598
+
599
+ protected
600
+
601
+ def addFormattingElement(name, attributes)
602
+ @tree.insert_element(name, attributes)
603
+ @tree.activeFormattingElements.push(@tree.open_elements.last)
604
+ end
605
+
606
+ end
607
+ end
@@ -0,0 +1,68 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InCaptionPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
7
+
8
+ handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
9
+
10
+ handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
11
+
12
+ def ignoreEndTagCaption
13
+ !in_scope?('caption', true)
14
+ end
15
+
16
+ def processCharacters(data)
17
+ @parser.phases[:inBody].processCharacters(data)
18
+ end
19
+
20
+ def startTagTableElement(name, attributes)
21
+ parse_error
22
+ #XXX Have to duplicate logic here to find out if the tag is ignored
23
+ ignoreEndTag = ignoreEndTagCaption
24
+ @parser.phase.processEndTag('caption')
25
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
26
+ end
27
+
28
+ def startTagOther(name, attributes)
29
+ @parser.phases[:inBody].processStartTag(name, attributes)
30
+ end
31
+
32
+ def endTagCaption(name)
33
+ if ignoreEndTagCaption
34
+ # inner_html case
35
+ assert @parser.inner_html
36
+ parse_error
37
+ else
38
+ # AT this code is quite similar to endTagTable in "InTable"
39
+ @tree.generateImpliedEndTags
40
+
41
+ unless @tree.open_elements[-1].name == 'caption'
42
+ parse_error(_("Unexpected end tag (caption). Missing end tags."))
43
+ end
44
+
45
+ remove_open_elements_until('caption')
46
+
47
+ @tree.clearActiveFormattingElements
48
+ @parser.phase = @parser.phases[:inTable]
49
+ end
50
+ end
51
+
52
+ def endTagTable(name)
53
+ parse_error
54
+ ignoreEndTag = ignoreEndTagCaption
55
+ @parser.phase.processEndTag('caption')
56
+ @parser.phase.processEndTag(name) unless ignoreEndTag
57
+ end
58
+
59
+ def endTagIgnore(name)
60
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
61
+ end
62
+
63
+ def endTagOther(name)
64
+ @parser.phases[:inBody].processEndTag(name)
65
+ end
66
+
67
+ end
68
+ end