html5 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +58 -0
  3. data/README +9 -0
  4. data/Rakefile.rb +17 -0
  5. data/lib/html5/constants.rb +818 -0
  6. data/lib/html5/filters/base.rb +10 -0
  7. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  8. data/lib/html5/filters/optionaltags.rb +198 -0
  9. data/lib/html5/filters/sanitizer.rb +15 -0
  10. data/lib/html5/filters/whitespace.rb +36 -0
  11. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  12. data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
  13. data/lib/html5/html5parser/after_head_phase.rb +50 -0
  14. data/lib/html5/html5parser/before_head_phase.rb +41 -0
  15. data/lib/html5/html5parser/in_body_phase.rb +607 -0
  16. data/lib/html5/html5parser/in_caption_phase.rb +68 -0
  17. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  18. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  19. data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  20. data/lib/html5/html5parser/in_head_phase.rb +138 -0
  21. data/lib/html5/html5parser/in_row_phase.rb +87 -0
  22. data/lib/html5/html5parser/in_select_phase.rb +84 -0
  23. data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
  24. data/lib/html5/html5parser/in_table_phase.rb +110 -0
  25. data/lib/html5/html5parser/initial_phase.rb +134 -0
  26. data/lib/html5/html5parser/phase.rb +158 -0
  27. data/lib/html5/html5parser/root_element_phase.rb +42 -0
  28. data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  29. data/lib/html5/html5parser.rb +248 -0
  30. data/lib/html5/inputstream.rb +654 -0
  31. data/lib/html5/liberalxmlparser.rb +158 -0
  32. data/lib/html5/sanitizer.rb +188 -0
  33. data/lib/html5/serializer/htmlserializer.rb +180 -0
  34. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  35. data/lib/html5/serializer.rb +2 -0
  36. data/lib/html5/tokenizer.rb +968 -0
  37. data/lib/html5/treebuilders/base.rb +334 -0
  38. data/lib/html5/treebuilders/hpricot.rb +231 -0
  39. data/lib/html5/treebuilders/rexml.rb +208 -0
  40. data/lib/html5/treebuilders/simpletree.rb +185 -0
  41. data/lib/html5/treebuilders.rb +24 -0
  42. data/lib/html5/treewalkers/base.rb +154 -0
  43. data/lib/html5/treewalkers/hpricot.rb +48 -0
  44. data/lib/html5/treewalkers/rexml.rb +48 -0
  45. data/lib/html5/treewalkers/simpletree.rb +48 -0
  46. data/lib/html5/treewalkers.rb +26 -0
  47. data/lib/html5.rb +13 -0
  48. data/parse.rb +217 -0
  49. data/tests/preamble.rb +82 -0
  50. data/tests/test_encoding.rb +35 -0
  51. data/tests/test_lxp.rb +263 -0
  52. data/tests/test_parser.rb +68 -0
  53. data/tests/test_sanitizer.rb +142 -0
  54. data/tests/test_serializer.rb +68 -0
  55. data/tests/test_stream.rb +62 -0
  56. data/tests/test_tokenizer.rb +94 -0
  57. data/tests/test_treewalkers.rb +116 -0
  58. data/tests/tokenizer_test_parser.rb +63 -0
  59. metadata +120 -0
@@ -0,0 +1,607 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-body
7
+
8
+ handle_start 'html'
9
+ handle_start %w( base link meta script style ) => 'ProcessInHead'
10
+ handle_start 'title'
11
+
12
+ handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
13
+
14
+ handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
15
+
16
+ handle_start %w( li dd dt ) => 'ListItem'
17
+
18
+ handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
19
+
20
+ handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
21
+ handle_start 'nobr'
22
+
23
+ handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
24
+
25
+ handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading'
26
+
27
+ handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced'
28
+
29
+ handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New'
30
+
31
+ handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem'
32
+
33
+ handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
34
+
35
+ handle_end HEADING_ELEMENTS => 'Heading'
36
+
37
+ handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
38
+
39
+ handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
40
+
41
+ handle_end 'br'
42
+
43
+ handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
44
+
45
+ handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
46
+
47
+ handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New'
48
+
49
+ def initialize(parser, tree)
50
+ super(parser, tree)
51
+
52
+ # for special handling of whitespace in <pre>
53
+ @processSpaceCharactersDropNewline = false
54
+ if $-w
55
+ $-w = false
56
+ alias processSpaceCharactersNonPre processSpaceCharacters
57
+ $-w = true
58
+ else
59
+ alias processSpaceCharactersNonPre processSpaceCharacters
60
+ end
61
+ end
62
+
63
+ def processSpaceCharactersDropNewline(data)
64
+ # #Sometimes (start of <pre> blocks) we want to drop leading newlines
65
+
66
+ if $-w
67
+ $-w = false
68
+ alias processSpaceCharacters processSpaceCharactersNonPre
69
+ $-w = true
70
+ else
71
+ alias processSpaceCharacters processSpaceCharactersNonPre
72
+ end
73
+
74
+ if (data.length > 0 and data[0] == ?\n &&
75
+ %w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
76
+ data = data[1..-1]
77
+ end
78
+
79
+ if data.length > 0
80
+ @tree.reconstructActiveFormattingElements
81
+ @tree.insertText(data)
82
+ end
83
+ end
84
+
85
+ def processSpaceCharacters(data)
86
+ @tree.reconstructActiveFormattingElements()
87
+ @tree.insertText(data)
88
+ end
89
+
90
+ def processCharacters(data)
91
+ # XXX The specification says to do this for every character at the
92
+ # moment, but apparently that doesn't match the real world so we don't
93
+ # do it for space characters.
94
+ @tree.reconstructActiveFormattingElements
95
+ @tree.insertText(data)
96
+ end
97
+
98
+ def startTagProcessInHead(name, attributes)
99
+ @parser.phases[:inHead].processStartTag(name, attributes)
100
+ end
101
+
102
+ def startTagTitle(name, attributes)
103
+ parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
104
+ @parser.phases[:inHead].processStartTag(name, attributes)
105
+ end
106
+
107
+ def startTagBody(name, attributes)
108
+ parse_error(_('Unexpected start tag (body).'))
109
+
110
+ if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
111
+ assert @parser.inner_html
112
+ else
113
+ attributes.each do |attr, value|
114
+ unless @tree.open_elements[1].attributes.has_key?(attr)
115
+ @tree.open_elements[1].attributes[attr] = value
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ def startTagCloseP(name, attributes)
122
+ endTagP('p') if in_scope?('p')
123
+ @tree.insert_element(name, attributes)
124
+ @processSpaceCharactersDropNewline = true if name == 'pre'
125
+ end
126
+
127
+ def startTagForm(name, attributes)
128
+ if @tree.formPointer
129
+ parse_error(_('Unexpected start tag (form). Ignored.'))
130
+ else
131
+ endTagP('p') if in_scope?('p')
132
+ @tree.insert_element(name, attributes)
133
+ @tree.formPointer = @tree.open_elements[-1]
134
+ end
135
+ end
136
+
137
+ def startTagListItem(name, attributes)
138
+ endTagP('p') if in_scope?('p')
139
+ stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
140
+ stopName = stopNames[name]
141
+
142
+ @tree.open_elements.reverse.each_with_index do |node, i|
143
+ if stopName.include?(node.name)
144
+ poppedNodes = (0..i).collect { @tree.open_elements.pop }
145
+ if i >= 1
146
+ parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
147
+ end
148
+ break
149
+ end
150
+
151
+ # Phrasing elements are all non special, non scoping, non
152
+ # formatting elements
153
+ break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
154
+ end
155
+
156
+ # Always insert an <li> element.
157
+ @tree.insert_element(name, attributes)
158
+ end
159
+
160
+ def startTagPlaintext(name, attributes)
161
+ endTagP('p') if in_scope?('p')
162
+ @tree.insert_element(name, attributes)
163
+ @parser.tokenizer.content_model_flag = :PLAINTEXT
164
+ end
165
+
166
+ def startTagHeading(name, attributes)
167
+ endTagP('p') if in_scope?('p')
168
+
169
+ # Uncomment the following for IE7 behavior:
170
+ # HEADING_ELEMENTS.each do |element|
171
+ # if in_scope?(element)
172
+ # parse_error(_("Unexpected start tag (#{name})."))
173
+ #
174
+ # remove_open_elements_until do |element|
175
+ # HEADING_ELEMENTS.include?(element.name)
176
+ # end
177
+ #
178
+ # break
179
+ # end
180
+ # end
181
+ @tree.insert_element(name, attributes)
182
+ end
183
+
184
+ def startTagA(name, attributes)
185
+ if afeAElement = @tree.elementInActiveFormattingElements('a')
186
+ parse_error(_('Unexpected start tag (a) implies end tag (a).'))
187
+ endTagFormatting('a')
188
+ @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
189
+ @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
190
+ end
191
+ @tree.reconstructActiveFormattingElements
192
+ addFormattingElement(name, attributes)
193
+ end
194
+
195
+ def startTagFormatting(name, attributes)
196
+ @tree.reconstructActiveFormattingElements
197
+ addFormattingElement(name, attributes)
198
+ end
199
+
200
+ def startTagNobr(name, attributes)
201
+ @tree.reconstructActiveFormattingElements
202
+ if in_scope?('nobr')
203
+ parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
204
+ processEndTag('nobr')
205
+ # XXX Need tests that trigger the following
206
+ @tree.reconstructActiveFormattingElements
207
+ end
208
+ addFormattingElement(name, attributes)
209
+ end
210
+
211
+ def startTagButton(name, attributes)
212
+ if in_scope?('button')
213
+ parse_error(_('Unexpected start tag (button) implied end tag (button).'))
214
+ processEndTag('button')
215
+ @parser.phase.processStartTag(name, attributes)
216
+ else
217
+ @tree.reconstructActiveFormattingElements
218
+ @tree.insert_element(name, attributes)
219
+ @tree.activeFormattingElements.push(Marker)
220
+ end
221
+ end
222
+
223
+ def startTagMarqueeObject(name, attributes)
224
+ @tree.reconstructActiveFormattingElements
225
+ @tree.insert_element(name, attributes)
226
+ @tree.activeFormattingElements.push(Marker)
227
+ end
228
+
229
+ def startTagXmp(name, attributes)
230
+ @tree.reconstructActiveFormattingElements
231
+ @tree.insert_element(name, attributes)
232
+ @parser.tokenizer.content_model_flag = :CDATA
233
+ end
234
+
235
+ def startTagTable(name, attributes)
236
+ processEndTag('p') if in_scope?('p')
237
+ @tree.insert_element(name, attributes)
238
+ @parser.phase = @parser.phases[:inTable]
239
+ end
240
+
241
+ def startTagVoidFormatting(name, attributes)
242
+ @tree.reconstructActiveFormattingElements
243
+ @tree.insert_element(name, attributes)
244
+ @tree.open_elements.pop
245
+ end
246
+
247
+ def startTagHr(name, attributes)
248
+ endTagP('p') if in_scope?('p')
249
+ @tree.insert_element(name, attributes)
250
+ @tree.open_elements.pop
251
+ end
252
+
253
+ def startTagImage(name, attributes)
254
+ # No really...
255
+ parse_error(_('Unexpected start tag (image). Treated as img.'))
256
+ processStartTag('img', attributes)
257
+ end
258
+
259
+ def startTagInput(name, attributes)
260
+ @tree.reconstructActiveFormattingElements
261
+ @tree.insert_element(name, attributes)
262
+ if @tree.formPointer
263
+ # XXX Not exactly sure what to do here
264
+ # @tree.open_elements[-1].form = @tree.formPointer
265
+ end
266
+ @tree.open_elements.pop
267
+ end
268
+
269
+ def startTagIsindex(name, attributes)
270
+ parse_error(_("Unexpected start tag isindex. Don't use it!"))
271
+ return if @tree.formPointer
272
+ processStartTag('form', {})
273
+ processStartTag('hr', {})
274
+ processStartTag('p', {})
275
+ processStartTag('label', {})
276
+ # XXX Localization ...
277
+ processCharacters('This is a searchable index. Insert your search keywords here: ')
278
+ attributes['name'] = 'isindex'
279
+ attrs = attributes.to_a
280
+ processStartTag('input', attributes)
281
+ processEndTag('label')
282
+ processEndTag('p')
283
+ processStartTag('hr', {})
284
+ processEndTag('form')
285
+ end
286
+
287
+ def startTagTextarea(name, attributes)
288
+ # XXX Form element pointer checking here as well...
289
+ @tree.insert_element(name, attributes)
290
+ @parser.tokenizer.content_model_flag = :RCDATA
291
+ @processSpaceCharactersDropNewline = true
292
+ alias processSpaceCharacters processSpaceCharactersDropNewline
293
+ end
294
+
295
+ # iframe, noembed noframes, noscript(if scripting enabled)
296
+ def startTagCdata(name, attributes)
297
+ @tree.insert_element(name, attributes)
298
+ @parser.tokenizer.content_model_flag = :CDATA
299
+ end
300
+
301
+ def startTagSelect(name, attributes)
302
+ @tree.reconstructActiveFormattingElements
303
+ @tree.insert_element(name, attributes)
304
+ @parser.phase = @parser.phases[:inSelect]
305
+ end
306
+
307
+ def startTagMisplaced(name, attributes)
308
+ # Elements that should be children of other elements that have a
309
+ # different insertion mode; here they are ignored
310
+ # "caption", "col", "colgroup", "frame", "frameset", "head",
311
+ # "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
312
+ # "tr", "noscript"
313
+ parse_error(_("Unexpected start tag (#{name}). Ignored."))
314
+ end
315
+
316
+ def startTagNew(name, attributes)
317
+ # New HTML5 elements, "event-source", "section", "nav",
318
+ # "article", "aside", "header", "footer", "datagrid", "command"
319
+ sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
320
+ startTagOther(name, attributes)
321
+ #raise NotImplementedError
322
+ end
323
+
324
+ def startTagOther(name, attributes)
325
+ @tree.reconstructActiveFormattingElements
326
+ @tree.insert_element(name, attributes)
327
+ end
328
+
329
+ def endTagP(name)
330
+ @tree.generateImpliedEndTags('p') if in_scope?('p')
331
+ parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
332
+ if in_scope?('p')
333
+ @tree.open_elements.pop while in_scope?('p')
334
+ else
335
+ startTagCloseP('p', {})
336
+ endTagP('p')
337
+ end
338
+ end
339
+
340
+ def endTagBody(name)
341
+ # XXX Need to take open <p> tags into account here. We shouldn't imply
342
+ # </p> but we should not throw a parse error either. Specification is
343
+ # likely to be updated.
344
+ unless @tree.open_elements[1].name == 'body'
345
+ # inner_html case
346
+ parse_error
347
+ return
348
+ end
349
+ unless @tree.open_elements.last.name == 'body'
350
+ parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
351
+ end
352
+ @parser.phase = @parser.phases[:afterBody]
353
+ end
354
+
355
+ def endTagHtml(name)
356
+ endTagBody(name)
357
+ @parser.phase.processEndTag(name) unless @parser.inner_html
358
+ end
359
+
360
+ def endTagBlock(name)
361
+ #Put us back in the right whitespace handling mode
362
+ @processSpaceCharactersDropNewline = false if name == 'pre'
363
+
364
+ @tree.generateImpliedEndTags if in_scope?(name)
365
+
366
+ unless @tree.open_elements.last.name == name
367
+ parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
368
+ end
369
+
370
+ if in_scope?(name)
371
+ remove_open_elements_until(name)
372
+ end
373
+ end
374
+
375
+ def endTagForm(name)
376
+ if in_scope?(name)
377
+ @tree.generateImpliedEndTags
378
+ end
379
+ if @tree.open_elements.last.name != name
380
+ parse_error(_("End tag (form) seen too early. Ignored."))
381
+ else
382
+ @tree.open_elements.pop
383
+ end
384
+ @tree.formPointer = nil
385
+ end
386
+
387
+ def endTagListItem(name)
388
+ # AT Could merge this with the Block case
389
+ @tree.generateImpliedEndTags(name) if in_scope?(name)
390
+
391
+ unless @tree.open_elements.last.name == name
392
+ parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
393
+ end
394
+
395
+ remove_open_elements_until(name) if in_scope?(name)
396
+ end
397
+
398
+ def endTagHeading(name)
399
+ HEADING_ELEMENTS.each do |element|
400
+ if in_scope?(element)
401
+ @tree.generateImpliedEndTags
402
+ break
403
+ end
404
+ end
405
+
406
+ unless @tree.open_elements.last.name == name
407
+ parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
408
+ end
409
+
410
+ HEADING_ELEMENTS.each do |element|
411
+ if in_scope?(element)
412
+ remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
413
+ break
414
+ end
415
+ end
416
+ end
417
+
418
+ # The much-feared adoption agency algorithm
419
+ def endTagFormatting(name)
420
+ # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
421
+ # XXX Better parse_error messages appreciated.
422
+ while true
423
+ # Step 1 paragraph 1
424
+ afeElement = @tree.elementInActiveFormattingElements(name)
425
+ if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
426
+ parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
427
+ return
428
+ # Step 1 paragraph 2
429
+ elsif not @tree.open_elements.include?(afeElement)
430
+ parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
431
+ @tree.activeFormattingElements.delete(afeElement)
432
+ return
433
+ end
434
+
435
+ # Step 1 paragraph 3
436
+ if afeElement != @tree.open_elements.last
437
+ parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
438
+ end
439
+
440
+ # Step 2
441
+ # Start of the adoption agency algorithm proper
442
+ afeIndex = @tree.open_elements.index(afeElement)
443
+ furthestBlock = nil
444
+ @tree.open_elements[afeIndex..-1].each do |element|
445
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
446
+ furthestBlock = element
447
+ break
448
+ end
449
+ end
450
+
451
+ # Step 3
452
+ if furthestBlock.nil?
453
+ element = remove_open_elements_until {|element| element == afeElement }
454
+ @tree.activeFormattingElements.delete(element)
455
+ return
456
+ end
457
+ commonAncestor = @tree.open_elements[afeIndex - 1]
458
+
459
+ # Step 5
460
+ furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
461
+
462
+ # Step 6
463
+ # The bookmark is supposed to help us identify where to reinsert
464
+ # nodes in step 12. We have to ensure that we reinsert nodes after
465
+ # the node before the active formatting element. Note the bookmark
466
+ # can move in step 7.4
467
+ bookmark = @tree.activeFormattingElements.index(afeElement)
468
+
469
+ # Step 7
470
+ lastNode = node = furthestBlock
471
+ while true
472
+ # AT replace this with a function and recursion?
473
+ # Node is element before node in open elements
474
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
475
+ until @tree.activeFormattingElements.include?(node)
476
+ tmpNode = node
477
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
478
+ @tree.open_elements.delete(tmpNode)
479
+ end
480
+ # Step 7.3
481
+ break if node == afeElement
482
+ # Step 7.4
483
+ if lastNode == furthestBlock
484
+ # XXX should this be index(node) or index(node)+1
485
+ # Anne: I think +1 is ok. Given x = [2,3,4,5]
486
+ # x.index(3) gives 1 and then x[1 +1] gives 4...
487
+ bookmark = @tree.activeFormattingElements.index(node) + 1
488
+ end
489
+ # Step 7.5
490
+ cite = node.parent
491
+ if node.hasContent
492
+ clone = node.cloneNode
493
+ # Replace node with clone
494
+ @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
495
+ @tree.open_elements[@tree.open_elements.index(node)] = clone
496
+ node = clone
497
+ end
498
+ # Step 7.6
499
+ # Remove lastNode from its parents, if any
500
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
501
+ node.appendChild(lastNode)
502
+ # Step 7.7
503
+ lastNode = node
504
+ # End of inner loop
505
+ end
506
+
507
+ # Step 8
508
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
509
+ commonAncestor.appendChild(lastNode)
510
+
511
+ # Step 9
512
+ clone = afeElement.cloneNode
513
+
514
+ # Step 10
515
+ furthestBlock.reparentChildren(clone)
516
+
517
+ # Step 11
518
+ furthestBlock.appendChild(clone)
519
+
520
+ # Step 12
521
+ @tree.activeFormattingElements.delete(afeElement)
522
+ @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
523
+
524
+ # Step 13
525
+ @tree.open_elements.delete(afeElement)
526
+ @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
527
+ end
528
+ end
529
+
530
+ def endTagButtonMarqueeObject(name)
531
+ @tree.generateImpliedEndTags if in_scope?(name)
532
+
533
+ unless @tree.open_elements.last.name == name
534
+ parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
535
+ end
536
+
537
+ if in_scope?(name)
538
+ remove_open_elements_until(name)
539
+
540
+ @tree.clearActiveFormattingElements
541
+ end
542
+ end
543
+
544
+ def endTagMisplaced(name)
545
+ # This handles elements with end tags in other insertion modes.
546
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
547
+ end
548
+
549
+ def endTagBr(name)
550
+ parse_error(_("Unexpected end tag (br). Treated as br element."))
551
+ @tree.reconstructActiveFormattingElements
552
+ @tree.insert_element(name, {})
553
+ @tree.open_elements.pop()
554
+ end
555
+
556
+ def endTagNone(name)
557
+ # This handles elements with no end tag.
558
+ parse_error(_("This tag (#{name}) has no end tag"))
559
+ end
560
+
561
+ def endTagCdataTextAreaXmp(name)
562
+ if @tree.open_elements.last.name == name
563
+ @tree.open_elements.pop
564
+ else
565
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
566
+ end
567
+ end
568
+
569
+ def endTagNew(name)
570
+ # New HTML5 elements, "event-source", "section", "nav",
571
+ # "article", "aside", "header", "footer", "datagrid", "command"
572
+ STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
573
+ endTagOther(name)
574
+ #raise NotImplementedError
575
+ end
576
+
577
+ def endTagOther(name)
578
+ # XXX This logic should be moved into the treebuilder
579
+ @tree.open_elements.reverse.each do |node|
580
+ if node.name == name
581
+ @tree.generateImpliedEndTags
582
+
583
+ unless @tree.open_elements.last.name == name
584
+ parse_error(_("Unexpected end tag (#{name})."))
585
+ end
586
+
587
+ remove_open_elements_until {|element| element == node }
588
+
589
+ break
590
+ else
591
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
592
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
593
+ break
594
+ end
595
+ end
596
+ end
597
+ end
598
+
599
+ protected
600
+
601
+ def addFormattingElement(name, attributes)
602
+ @tree.insert_element(name, attributes)
603
+ @tree.activeFormattingElements.push(@tree.open_elements.last)
604
+ end
605
+
606
+ end
607
+ end
@@ -0,0 +1,68 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InCaptionPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
7
+
8
+ handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
9
+
10
+ handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
11
+
12
+ def ignoreEndTagCaption
13
+ !in_scope?('caption', true)
14
+ end
15
+
16
+ def processCharacters(data)
17
+ @parser.phases[:inBody].processCharacters(data)
18
+ end
19
+
20
+ def startTagTableElement(name, attributes)
21
+ parse_error
22
+ #XXX Have to duplicate logic here to find out if the tag is ignored
23
+ ignoreEndTag = ignoreEndTagCaption
24
+ @parser.phase.processEndTag('caption')
25
+ @parser.phase.processStartTag(name, attributes) unless ignoreEndTag
26
+ end
27
+
28
+ def startTagOther(name, attributes)
29
+ @parser.phases[:inBody].processStartTag(name, attributes)
30
+ end
31
+
32
+ def endTagCaption(name)
33
+ if ignoreEndTagCaption
34
+ # inner_html case
35
+ assert @parser.inner_html
36
+ parse_error
37
+ else
38
+ # AT this code is quite similar to endTagTable in "InTable"
39
+ @tree.generateImpliedEndTags
40
+
41
+ unless @tree.open_elements[-1].name == 'caption'
42
+ parse_error(_("Unexpected end tag (caption). Missing end tags."))
43
+ end
44
+
45
+ remove_open_elements_until('caption')
46
+
47
+ @tree.clearActiveFormattingElements
48
+ @parser.phase = @parser.phases[:inTable]
49
+ end
50
+ end
51
+
52
+ def endTagTable(name)
53
+ parse_error
54
+ ignoreEndTag = ignoreEndTagCaption
55
+ @parser.phase.processEndTag('caption')
56
+ @parser.phase.processEndTag(name) unless ignoreEndTag
57
+ end
58
+
59
+ def endTagIgnore(name)
60
+ parse_error(_("Unexpected end tag (#{name}). Ignored."))
61
+ end
62
+
63
+ def endTagOther(name)
64
+ @parser.phases[:inBody].processEndTag(name)
65
+ end
66
+
67
+ end
68
+ end