spk-html5 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. data/History.txt +10 -0
  2. data/Manifest.txt +73 -0
  3. data/README +45 -0
  4. data/Rakefile.rb +33 -0
  5. data/bin/html5 +7 -0
  6. data/lib/html5.rb +13 -0
  7. data/lib/html5/cli.rb +248 -0
  8. data/lib/html5/constants.rb +1061 -0
  9. data/lib/html5/filters/base.rb +10 -0
  10. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  11. data/lib/html5/filters/iso639codes.rb +755 -0
  12. data/lib/html5/filters/optionaltags.rb +198 -0
  13. data/lib/html5/filters/rfc2046.rb +31 -0
  14. data/lib/html5/filters/rfc3987.rb +91 -0
  15. data/lib/html5/filters/sanitizer.rb +15 -0
  16. data/lib/html5/filters/validator.rb +834 -0
  17. data/lib/html5/filters/whitespace.rb +36 -0
  18. data/lib/html5/html5parser.rb +247 -0
  19. data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
  20. data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
  21. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  22. data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  23. data/lib/html5/html5parser/after_head_phase.rb +55 -0
  24. data/lib/html5/html5parser/before_head_phase.rb +44 -0
  25. data/lib/html5/html5parser/before_html_phase.rb +41 -0
  26. data/lib/html5/html5parser/in_body_phase.rb +636 -0
  27. data/lib/html5/html5parser/in_caption_phase.rb +69 -0
  28. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  29. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  30. data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
  31. data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
  32. data/lib/html5/html5parser/in_head_phase.rb +143 -0
  33. data/lib/html5/html5parser/in_row_phase.rb +96 -0
  34. data/lib/html5/html5parser/in_select_phase.rb +90 -0
  35. data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
  36. data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
  37. data/lib/html5/html5parser/in_table_phase.rb +177 -0
  38. data/lib/html5/html5parser/initial_phase.rb +133 -0
  39. data/lib/html5/html5parser/phase.rb +171 -0
  40. data/lib/html5/inputstream.rb +735 -0
  41. data/lib/html5/liberalxmlparser.rb +158 -0
  42. data/lib/html5/sanitizer.rb +209 -0
  43. data/lib/html5/serializer.rb +2 -0
  44. data/lib/html5/serializer/htmlserializer.rb +179 -0
  45. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  46. data/lib/html5/sniffer.rb +45 -0
  47. data/lib/html5/tokenizer.rb +1059 -0
  48. data/lib/html5/treebuilders.rb +24 -0
  49. data/lib/html5/treebuilders/base.rb +339 -0
  50. data/lib/html5/treebuilders/hpricot.rb +231 -0
  51. data/lib/html5/treebuilders/rexml.rb +215 -0
  52. data/lib/html5/treebuilders/simpletree.rb +191 -0
  53. data/lib/html5/treewalkers.rb +26 -0
  54. data/lib/html5/treewalkers/base.rb +162 -0
  55. data/lib/html5/treewalkers/hpricot.rb +48 -0
  56. data/lib/html5/treewalkers/rexml.rb +48 -0
  57. data/lib/html5/treewalkers/simpletree.rb +48 -0
  58. data/lib/html5/version.rb +3 -0
  59. data/test/preamble.rb +69 -0
  60. data/test/test_cli.rb +16 -0
  61. data/test/test_encoding.rb +35 -0
  62. data/test/test_input_stream.rb +26 -0
  63. data/test/test_lxp.rb +283 -0
  64. data/test/test_parser.rb +63 -0
  65. data/test/test_sanitizer.rb +173 -0
  66. data/test/test_serializer.rb +67 -0
  67. data/test/test_sniffer.rb +27 -0
  68. data/test/test_stream.rb +71 -0
  69. data/test/test_tokenizer.rb +95 -0
  70. data/test/test_treewalkers.rb +135 -0
  71. data/test/test_validator.rb +31 -0
  72. data/test/tokenizer_test_parser.rb +67 -0
  73. data/test19.rb +38 -0
  74. metadata +198 -0
@@ -0,0 +1,44 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class BeforeHeadPhase < Phase
5
+
6
+ handle_start 'html', 'head'
7
+
8
+ handle_end %w( head br ) => 'ImplyHead'
9
+
10
+ def process_eof
11
+ startTagHead('head', {})
12
+ @parser.phase.process_eof
13
+ end
14
+
15
+ def processSpaceCharacters(data)
16
+ end
17
+
18
+ def processCharacters(data)
19
+ startTagHead('head', {})
20
+ @parser.phase.processCharacters(data)
21
+ end
22
+
23
+ def startTagHead(name, attributes)
24
+ @tree.insert_element(name, attributes)
25
+ @tree.head_pointer = @tree.open_elements[-1]
26
+ @parser.phase = @parser.phases[:inHead]
27
+ end
28
+
29
+ def startTagOther(name, attributes)
30
+ startTagHead('head', {})
31
+ @parser.phase.processStartTag(name, attributes)
32
+ end
33
+
34
+ def endTagImplyHead(name)
35
+ startTagHead('head', {})
36
+ @parser.phase.processEndTag(name)
37
+ end
38
+
39
+ def endTagOther(name)
40
+ parse_error("end-tag-after-implied-root", {"name" => name})
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,41 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class BeforeHtmlPhase < Phase
5
+
6
+ def process_eof
7
+ insert_html_element
8
+ @parser.phase.process_eof
9
+ end
10
+
11
+ def processComment(data)
12
+ @tree.insert_comment(data, @tree.document)
13
+ end
14
+
15
+ def processSpaceCharacters(data)
16
+ end
17
+
18
+ def processCharacters(data)
19
+ insert_html_element
20
+ @parser.phase.processCharacters(data)
21
+ end
22
+
23
+ def processStartTag(name, attributes, self_closing=false)
24
+ @parser.first_start_tag = true if name == 'html'
25
+ insert_html_element
26
+ @parser.phase.processStartTag(name, attributes)
27
+ end
28
+
29
+ def processEndTag(name)
30
+ insert_html_element
31
+ @parser.phase.processEndTag(name)
32
+ end
33
+
34
+ def insert_html_element
35
+ element = @tree.createElement('html', {})
36
+ @tree.open_elements << element
37
+ @tree.document.appendChild(element)
38
+ @parser.phase = @parser.phases[:beforeHead]
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,636 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-body
7
+
8
+ handle_start 'html'
9
+ handle_start %w(base link meta script style title) => 'ProcessInHead'
10
+
11
+ handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
12
+
13
+ handle_start 'input', 'textarea', 'select', 'isindex', %w(applet marquee object)
14
+
15
+ handle_start %w(li dd dt) => 'ListItem'
16
+
17
+ handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
18
+
19
+ handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
20
+ handle_start 'nobr'
21
+
22
+ handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
23
+
24
+ handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
25
+
26
+ handle_start %w(caption col colgroup frame frameset head tbody td tfoot th thead tr) => 'Misplaced'
27
+
28
+ handle_start %w(option optgroup)
29
+
30
+ handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
31
+
32
+ handle_start %w[math] => 'ForeignContent'
33
+
34
+ handle_end 'p', 'body', 'html', 'form', %w(applet button marquee object), %w(dd dt li) => 'ListItem'
35
+
36
+ handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
37
+
38
+ handle_end HEADING_ELEMENTS => 'Heading'
39
+
40
+ handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
41
+
42
+ handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
43
+
44
+ handle_end 'br'
45
+
46
+ handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
47
+
48
+ handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
49
+
50
+ handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
51
+
52
+ def initialize(parser, tree)
53
+ super(parser, tree)
54
+
55
+ # for special handling of whitespace in <pre>
56
+ class << self
57
+ alias processSpaceCharactersNonPre processSpaceCharacters
58
+ end
59
+ end
60
+
61
+ def processSpaceCharactersDropNewline(data)
62
+ # #Sometimes (start of <pre> blocks) we want to drop leading newlines
63
+
64
+ class << self
65
+ remove_method :processSpaceCharacters rescue nil
66
+ alias processSpaceCharacters processSpaceCharactersNonPre
67
+ end
68
+
69
+ if (data.length > 0 and data[0] == ?\n &&
70
+ %w[listing pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
71
+ data = data[1..-1]
72
+ end
73
+
74
+ if data.length > 0
75
+ @tree.reconstructActiveFormattingElements
76
+ @tree.insertText(data)
77
+ end
78
+ end
79
+
80
+ def processSpaceCharacters(data)
81
+ @tree.reconstructActiveFormattingElements()
82
+ @tree.insertText(data)
83
+ end
84
+
85
+ def processCharacters(data)
86
+ # XXX The specification says to do this for every character at the
87
+ # moment, but apparently that doesn't match the real world so we don't
88
+ # do it for space characters.
89
+ @tree.reconstructActiveFormattingElements
90
+ @tree.insertText(data)
91
+ end
92
+
93
+ def startTagProcessInHead(name, attributes)
94
+ @parser.phases[:inHead].processStartTag(name, attributes)
95
+ end
96
+
97
+ def startTagBody(name, attributes)
98
+ parse_error("unexpected-start-tag", {"name" => "body"})
99
+
100
+ if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
101
+ assert @parser.inner_html
102
+ else
103
+ attributes.each do |attr, value|
104
+ unless @tree.open_elements[1].attributes.has_key?(attr)
105
+ @tree.open_elements[1].attributes[attr] = value
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ def startTagCloseP(name, attributes)
112
+ endTagP('p') if in_scope?('p')
113
+ @tree.insert_element(name, attributes)
114
+ if ['pre', 'listing'].include?(name)
115
+ class << self
116
+ remove_method :processSpaceCharacters rescue nil
117
+ alias processSpaceCharacters processSpaceCharactersDropNewline
118
+ end
119
+ end
120
+ end
121
+
122
+ def startTagForm(name, attributes)
123
+ if @tree.formPointer
124
+ parse_error("unexpected-start-tag", {"name" => name})
125
+ else
126
+ endTagP('p') if in_scope?('p')
127
+ @tree.insert_element(name, attributes)
128
+ @tree.formPointer = @tree.open_elements.last
129
+ end
130
+ end
131
+
132
+ def startTagListItem(name, attributes)
133
+ endTagP('p') if in_scope?('p')
134
+ stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
135
+ stopName = stopNames[name]
136
+
137
+ @tree.open_elements.reverse.each_with_index do |node, i|
138
+ if stopName.include?(node.name)
139
+ poppedNodes = (0..i).collect { @tree.open_elements.pop }
140
+ if i >= 1
141
+ parse_error(
142
+ i == 1 ? "missing-end-tag" : "missing-end-tags",
143
+ {"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
144
+
145
+ end
146
+ break
147
+ end
148
+
149
+ # Phrasing elements are all non special, non scoping, non
150
+ # formatting elements
151
+ break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
152
+ end
153
+
154
+ # Always insert an <li> element.
155
+ @tree.insert_element(name, attributes)
156
+ end
157
+
158
+ def startTagPlaintext(name, attributes)
159
+ endTagP('p') if in_scope?('p')
160
+ @tree.insert_element(name, attributes)
161
+ @parser.tokenizer.content_model_flag = :PLAINTEXT
162
+ end
163
+
164
+ def startTagHeading(name, attributes)
165
+ endTagP('p') if in_scope?('p')
166
+
167
+ # Uncomment the following for IE7 behavior:
168
+ # HEADING_ELEMENTS.each do |element|
169
+ # if in_scope?(element)
170
+ # parse_error("unexpected-start-tag", {"name" => name})
171
+ #
172
+ # remove_open_elements_until do |element|
173
+ # HEADING_ELEMENTS.include?(element.name)
174
+ # end
175
+ #
176
+ # break
177
+ # end
178
+ # end
179
+ @tree.insert_element(name, attributes)
180
+ end
181
+
182
+ def startTagA(name, attributes)
183
+ if afeAElement = @tree.elementInActiveFormattingElements('a')
184
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
185
+ endTagFormatting('a')
186
+ @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
187
+ @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
188
+ end
189
+ @tree.reconstructActiveFormattingElements
190
+ addFormattingElement(name, attributes)
191
+ end
192
+
193
+ def startTagFormatting(name, attributes)
194
+ @tree.reconstructActiveFormattingElements
195
+ addFormattingElement(name, attributes)
196
+ end
197
+
198
+ def startTagNobr(name, attributes)
199
+ @tree.reconstructActiveFormattingElements
200
+ if in_scope?('nobr')
201
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
202
+ processEndTag('nobr')
203
+ # XXX Need tests that trigger the following
204
+ @tree.reconstructActiveFormattingElements
205
+ end
206
+ addFormattingElement(name, attributes)
207
+ end
208
+
209
+ def startTagButton(name, attributes)
210
+ if in_scope?('button')
211
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
212
+ processEndTag('button')
213
+ @parser.phase.processStartTag(name, attributes)
214
+ else
215
+ @tree.reconstructActiveFormattingElements
216
+ @tree.insert_element(name, attributes)
217
+ @tree.activeFormattingElements.push(Marker)
218
+ end
219
+ end
220
+
221
+ def startTagAppletMarqueeObject(name, attributes)
222
+ @tree.reconstructActiveFormattingElements
223
+ @tree.insert_element(name, attributes)
224
+ @tree.activeFormattingElements.push(Marker)
225
+ end
226
+
227
+ def startTagXmp(name, attributes)
228
+ @tree.reconstructActiveFormattingElements
229
+ @tree.insert_element(name, attributes)
230
+ @parser.tokenizer.content_model_flag = :CDATA
231
+ end
232
+
233
+ def startTagTable(name, attributes)
234
+ processEndTag('p') if in_scope?('p')
235
+ @tree.insert_element(name, attributes)
236
+ @parser.phase = @parser.phases[:inTable]
237
+ end
238
+
239
+ def startTagVoidFormatting(name, attributes)
240
+ @tree.reconstructActiveFormattingElements
241
+ @tree.insert_element(name, attributes)
242
+ @tree.open_elements.pop
243
+ end
244
+
245
+ def startTagHr(name, attributes)
246
+ endTagP('p') if in_scope?('p')
247
+ @tree.insert_element(name, attributes)
248
+ @tree.open_elements.pop
249
+ end
250
+
251
+ def startTagImage(name, attributes)
252
+ # No really...
253
+ parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
254
+ processStartTag('img', attributes)
255
+ end
256
+
257
+ def startTagInput(name, attributes)
258
+ @tree.reconstructActiveFormattingElements
259
+ @tree.insert_element(name, attributes)
260
+ if @tree.formPointer
261
+ # XXX Not exactly sure what to do here
262
+ # @tree.open_elements[-1].form = @tree.formPointer
263
+ end
264
+ @tree.open_elements.pop
265
+ end
266
+
267
+ def startTagIsindex(name, attributes)
268
+ parse_error("deprecated-tag", {"name" => "isindex"})
269
+ return if @tree.formPointer
270
+ processStartTag('form', {})
271
+ processStartTag('hr', {})
272
+ processStartTag('p', {})
273
+ processStartTag('label', {})
274
+ # XXX Localization ...
275
+ processCharacters('This is a searchable index. Insert your search keywords here: ')
276
+ attributes['name'] = 'isindex'
277
+ attrs = attributes.to_a
278
+ processStartTag('input', attributes)
279
+ processEndTag('label')
280
+ processEndTag('p')
281
+ processStartTag('hr', {})
282
+ processEndTag('form')
283
+ end
284
+
285
+ def startTagTextarea(name, attributes)
286
+ # XXX Form element pointer checking here as well...
287
+ @tree.insert_element(name, attributes)
288
+ @parser.tokenizer.content_model_flag = :RCDATA
289
+ class << self
290
+ remove_method :processSpaceCharacters rescue nil
291
+ alias processSpaceCharacters processSpaceCharactersDropNewline
292
+ end
293
+ end
294
+
295
+ # iframe, noembed noframes, noscript(if scripting enabled)
296
+ def startTagCdata(name, attributes)
297
+ @tree.insert_element(name, attributes)
298
+ @parser.tokenizer.content_model_flag = :CDATA
299
+ end
300
+
301
+ def startTagSelect(name, attributes)
302
+ @tree.reconstructActiveFormattingElements
303
+ @tree.insert_element(name, attributes)
304
+
305
+ if [@parser.phases[:inTable], @parser.phases[:inCaption],
306
+ @parser.phases[:inColumnGroup], @parser.phases[:inTableBody], @parser.phases[:inRow],
307
+ @parser.phases[:inCell]].include?(@parser.phase)
308
+ @parser.phase = @parser.phases[:inSelectInTable]
309
+ else
310
+ @parser.phase = @parser.phases[:inSelect]
311
+ end
312
+ end
313
+
314
+ def startTagMisplaced(name, attributes)
315
+ # Elements that should be children of other elements that have a
316
+ # different insertion mode; here they are ignored
317
+ # "caption", "col", "colgroup", "frame", "frameset", "head",
318
+ # "tbody", "td", "tfoot", "th", "thead",
319
+ # "tr", "noscript"
320
+ parse_error("unexpected-start-tag-ignored", {"name" => name})
321
+ end
322
+
323
+ def startTagOptionOptgroup(name, attributes)
324
+ if in_scope?('option')
325
+ endTagOther('option')
326
+ end
327
+ @tree.reconstructActiveFormattingElements
328
+ @tree.insert_element(name, attributes)
329
+ end
330
+
331
+ def startTagNew(name, attributes)
332
+ # New HTML5 elements, "event-source", "section", "nav",
333
+ # "article", "aside", "header", "footer", "datagrid", "command"
334
+ # $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
335
+ startTagOther(name, attributes)
336
+ #raise NotImplementedError
337
+ end
338
+
339
+ def startTagOther(name, attributes)
340
+ @tree.reconstructActiveFormattingElements
341
+ @tree.insert_element(name, attributes)
342
+ end
343
+
344
+ def startTagForeignContent(name, attributes)
345
+ @tree.reconstructActiveFormattingElements
346
+ attributes = adjust_mathml_attributes(attributes)
347
+ attributes = adjust_foreign_attributes(attributes)
348
+ @tree.insert_foreign_element(name, attributes, :math)
349
+ if false
350
+ # If the token has its self-closing flag set, pop the current node off the stack
351
+ # of open elements and acknowledge the token's self-closing flag.
352
+ else
353
+ @parser.secondary_phase = @parser.phase
354
+ @parser.phase = @parser.phases[:inForeignContent]
355
+ end
356
+ end
357
+
358
+ def endTagP(name)
359
+ @tree.generateImpliedEndTags('p') if in_scope?('p')
360
+ parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
361
+ if in_scope?('p')
362
+ @tree.open_elements.pop while in_scope?('p')
363
+ else
364
+ startTagCloseP('p', {})
365
+ endTagP('p')
366
+ end
367
+ end
368
+
369
+ def endTagBody(name)
370
+ # XXX Need to take open <p> tags into account here. We shouldn't imply
371
+ # </p> but we should not throw a parse error either. Specification is
372
+ # likely to be updated.
373
+ unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body'
374
+ # inner_html case
375
+ parse_error "unexpected-end-tag", {:name => 'body'}
376
+ return
377
+ end
378
+ unless @tree.open_elements.last.name == 'body'
379
+ parse_error("expected-one-end-tag-but-got-another",
380
+ {"expectedName" => "body",
381
+ "gotName" => @tree.open_elements.last.name})
382
+ end
383
+ @parser.phase = @parser.phases[:afterBody]
384
+ end
385
+
386
+ def endTagHtml(name)
387
+ endTagBody(name)
388
+ @parser.phase.processEndTag(name) unless @parser.inner_html
389
+ end
390
+
391
+ def endTagBlock(name)
392
+ @tree.generateImpliedEndTags if in_scope?(name)
393
+
394
+ unless @tree.open_elements.last.name == name
395
+ parse_error("end-tag-too-early", {"name" => name})
396
+ end
397
+
398
+ if in_scope?(name)
399
+ remove_open_elements_until(name)
400
+ end
401
+ end
402
+
403
+ def endTagForm(name)
404
+ @tree.formPointer = nil
405
+ if !in_scope?(name)
406
+ # parse error
407
+ else
408
+ @tree.generateImpliedEndTags
409
+ parse_error("end-tag-too-early-ignored", {"name" => "form"}) if @tree.open_elements.last.name != name
410
+ until name == @tree.open_elements.pop.name
411
+ end
412
+ end
413
+ end
414
+
415
+ def endTagListItem(name)
416
+ # AT Could merge this with the Block case
417
+ @tree.generateImpliedEndTags(name) if in_scope?(name)
418
+
419
+ unless @tree.open_elements.last.name == name
420
+ parse_error("end-tag-too-early", {"name" => name})
421
+ end
422
+
423
+ remove_open_elements_until(name) if in_scope?(name)
424
+ end
425
+
426
+ def endTagHeading(name)
427
+ HEADING_ELEMENTS.each do |element|
428
+ if in_scope?(element)
429
+ @tree.generateImpliedEndTags
430
+ break
431
+ end
432
+ end
433
+
434
+ unless @tree.open_elements.last.name == name
435
+ parse_error("end-tag-too-early", {"name" => name})
436
+ end
437
+
438
+ HEADING_ELEMENTS.each do |element|
439
+ if in_scope?(element)
440
+ remove_open_elements_until {|el| HEADING_ELEMENTS.include?(el.name)}
441
+ break
442
+ end
443
+ end
444
+ end
445
+
446
+ # The much-feared adoption agency algorithm
447
+ def endTagFormatting(name)
448
+ # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
449
+ # XXX Better parse_error messages appreciated.
450
+ while true
451
+ # Step 1 paragraph 1
452
+ afeElement = @tree.elementInActiveFormattingElements(name)
453
+ if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
454
+ parse_error("adoption-agency-1.1", {"name" => name})
455
+ return
456
+ # Step 1 paragraph 2
457
+ elsif not @tree.open_elements.include?(afeElement)
458
+ parse_error("adoption-agency-1.2", {"name" => name})
459
+ @tree.activeFormattingElements.delete(afeElement)
460
+ return
461
+ end
462
+
463
+ # Step 1 paragraph 3
464
+ if afeElement != @tree.open_elements.last
465
+ parse_error("adoption-agency-1.3", {"name" => name})
466
+ end
467
+
468
+ # Step 2
469
+ # Start of the adoption agency algorithm proper
470
+ afeIndex = @tree.open_elements.index(afeElement)
471
+ furthestBlock = nil
472
+ @tree.open_elements[afeIndex..-1].each do |element|
473
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
474
+ furthestBlock = element
475
+ break
476
+ end
477
+ end
478
+
479
+ # Step 3
480
+ if furthestBlock.nil?
481
+ element = remove_open_elements_until {|el| el == afeElement }
482
+ @tree.activeFormattingElements.delete(element)
483
+ return
484
+ end
485
+ commonAncestor = @tree.open_elements[afeIndex - 1]
486
+
487
+ # Step 5
488
+ furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
489
+
490
+ # Step 6
491
+ # The bookmark is supposed to help us identify where to reinsert
492
+ # nodes in step 12. We have to ensure that we reinsert nodes after
493
+ # the node before the active formatting element. Note the bookmark
494
+ # can move in step 7.4
495
+ bookmark = @tree.activeFormattingElements.index(afeElement)
496
+
497
+ # Step 7
498
+ lastNode = node = furthestBlock
499
+ while true
500
+ # AT replace this with a function and recursion?
501
+ # Node is element before node in open elements
502
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
503
+ until @tree.activeFormattingElements.include?(node)
504
+ tmpNode = node
505
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
506
+ @tree.open_elements.delete(tmpNode)
507
+ end
508
+ # Step 7.3
509
+ break if node == afeElement
510
+ # Step 7.4
511
+ if lastNode == furthestBlock
512
+ # XXX should this be index(node) or index(node)+1
513
+ # Anne: I think +1 is ok. Given x = [2,3,4,5]
514
+ # x.index(3) gives 1 and then x[1 +1] gives 4...
515
+ bookmark = @tree.activeFormattingElements.index(node) + 1
516
+ end
517
+ # Step 7.5
518
+ cite = node.parent
519
+ if node.hasContent
520
+ clone = node.cloneNode
521
+ # Replace node with clone
522
+ @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
523
+ @tree.open_elements[@tree.open_elements.index(node)] = clone
524
+ node = clone
525
+ end
526
+ # Step 7.6
527
+ # Remove lastNode from its parents, if any
528
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
529
+ node.appendChild(lastNode)
530
+ # Step 7.7
531
+ lastNode = node
532
+ # End of inner loop
533
+ end
534
+
535
+ # Step 8
536
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
537
+ commonAncestor.appendChild(lastNode)
538
+
539
+ # Step 9
540
+ clone = afeElement.cloneNode
541
+
542
+ # Step 10
543
+ furthestBlock.reparentChildren(clone)
544
+
545
+ # Step 11
546
+ furthestBlock.appendChild(clone)
547
+
548
+ # Step 12
549
+ @tree.activeFormattingElements.delete(afeElement)
550
+ @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
551
+
552
+ # Step 13
553
+ @tree.open_elements.delete(afeElement)
554
+ @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
555
+ end
556
+ end
557
+
558
+ def endTagAppletButtonMarqueeObject(name)
559
+ @tree.generateImpliedEndTags if in_scope?(name)
560
+
561
+ unless @tree.open_elements.last.name == name
562
+ parse_error("end-tag-too-early", {"name" => name})
563
+ end
564
+
565
+ if in_scope?(name)
566
+ remove_open_elements_until(name)
567
+
568
+ @tree.clearActiveFormattingElements
569
+ end
570
+ end
571
+
572
+ def endTagMisplaced(name)
573
+ # This handles elements with end tags in other insertion modes.
574
+ parse_error("unexpected-end-tag", {"name" => name})
575
+ end
576
+
577
+ def endTagBr(name)
578
+ parse_error("unexpected-end-tag-treated-as",
579
+ {"originalName" => "br", "newName" => "br element"})
580
+ @tree.reconstructActiveFormattingElements
581
+ @tree.insert_element(name, {})
582
+ @tree.open_elements.pop()
583
+ end
584
+
585
+ def endTagNone(name)
586
+ # This handles elements with no end tag.
587
+ parse_error("no-end-tag", {"name" => name})
588
+ end
589
+
590
+ def endTagCdataTextAreaXmp(name)
591
+ if @tree.open_elements.last.name == name
592
+ @tree.open_elements.pop
593
+ else
594
+ parse_error("unexpected-end-tag", {"name" => name})
595
+ end
596
+ end
597
+
598
+ def endTagNew(name)
599
+ # New HTML5 elements, "event-source", "section", "nav",
600
+ # "article", "aside", "header", "footer", "datagrid", "command"
601
+ # STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
602
+ endTagOther(name)
603
+ #raise NotImplementedError
604
+ end
605
+
606
+ def endTagOther(name)
607
+ # XXX This logic should be moved into the treebuilder
608
+ @tree.open_elements.reverse.each do |node|
609
+ if node.name == name
610
+ @tree.generateImpliedEndTags
611
+
612
+ unless @tree.open_elements.last.name == name
613
+ parse_error("unexpected-end-tag", {"name" => name})
614
+ end
615
+
616
+ remove_open_elements_until {|element| element == node }
617
+
618
+ break
619
+ else
620
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
621
+ parse_error("unexpected-end-tag", {"name" => name})
622
+ break
623
+ end
624
+ end
625
+ end
626
+ end
627
+
628
+ protected
629
+
630
+ def addFormattingElement(name, attributes)
631
+ @tree.insert_element(name, attributes)
632
+ @tree.activeFormattingElements.push(@tree.open_elements.last)
633
+ end
634
+
635
+ end
636
+ end