spk-html5 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. data/History.txt +10 -0
  2. data/Manifest.txt +73 -0
  3. data/README +45 -0
  4. data/Rakefile.rb +33 -0
  5. data/bin/html5 +7 -0
  6. data/lib/html5.rb +13 -0
  7. data/lib/html5/cli.rb +248 -0
  8. data/lib/html5/constants.rb +1061 -0
  9. data/lib/html5/filters/base.rb +10 -0
  10. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  11. data/lib/html5/filters/iso639codes.rb +755 -0
  12. data/lib/html5/filters/optionaltags.rb +198 -0
  13. data/lib/html5/filters/rfc2046.rb +31 -0
  14. data/lib/html5/filters/rfc3987.rb +91 -0
  15. data/lib/html5/filters/sanitizer.rb +15 -0
  16. data/lib/html5/filters/validator.rb +834 -0
  17. data/lib/html5/filters/whitespace.rb +36 -0
  18. data/lib/html5/html5parser.rb +247 -0
  19. data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
  20. data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
  21. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  22. data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  23. data/lib/html5/html5parser/after_head_phase.rb +55 -0
  24. data/lib/html5/html5parser/before_head_phase.rb +44 -0
  25. data/lib/html5/html5parser/before_html_phase.rb +41 -0
  26. data/lib/html5/html5parser/in_body_phase.rb +636 -0
  27. data/lib/html5/html5parser/in_caption_phase.rb +69 -0
  28. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  29. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  30. data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
  31. data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
  32. data/lib/html5/html5parser/in_head_phase.rb +143 -0
  33. data/lib/html5/html5parser/in_row_phase.rb +96 -0
  34. data/lib/html5/html5parser/in_select_phase.rb +90 -0
  35. data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
  36. data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
  37. data/lib/html5/html5parser/in_table_phase.rb +177 -0
  38. data/lib/html5/html5parser/initial_phase.rb +133 -0
  39. data/lib/html5/html5parser/phase.rb +171 -0
  40. data/lib/html5/inputstream.rb +735 -0
  41. data/lib/html5/liberalxmlparser.rb +158 -0
  42. data/lib/html5/sanitizer.rb +209 -0
  43. data/lib/html5/serializer.rb +2 -0
  44. data/lib/html5/serializer/htmlserializer.rb +179 -0
  45. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  46. data/lib/html5/sniffer.rb +45 -0
  47. data/lib/html5/tokenizer.rb +1059 -0
  48. data/lib/html5/treebuilders.rb +24 -0
  49. data/lib/html5/treebuilders/base.rb +339 -0
  50. data/lib/html5/treebuilders/hpricot.rb +231 -0
  51. data/lib/html5/treebuilders/rexml.rb +215 -0
  52. data/lib/html5/treebuilders/simpletree.rb +191 -0
  53. data/lib/html5/treewalkers.rb +26 -0
  54. data/lib/html5/treewalkers/base.rb +162 -0
  55. data/lib/html5/treewalkers/hpricot.rb +48 -0
  56. data/lib/html5/treewalkers/rexml.rb +48 -0
  57. data/lib/html5/treewalkers/simpletree.rb +48 -0
  58. data/lib/html5/version.rb +3 -0
  59. data/test/preamble.rb +69 -0
  60. data/test/test_cli.rb +16 -0
  61. data/test/test_encoding.rb +35 -0
  62. data/test/test_input_stream.rb +26 -0
  63. data/test/test_lxp.rb +283 -0
  64. data/test/test_parser.rb +63 -0
  65. data/test/test_sanitizer.rb +173 -0
  66. data/test/test_serializer.rb +67 -0
  67. data/test/test_sniffer.rb +27 -0
  68. data/test/test_stream.rb +71 -0
  69. data/test/test_tokenizer.rb +95 -0
  70. data/test/test_treewalkers.rb +135 -0
  71. data/test/test_validator.rb +31 -0
  72. data/test/tokenizer_test_parser.rb +67 -0
  73. data/test19.rb +38 -0
  74. metadata +198 -0
@@ -0,0 +1,44 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class BeforeHeadPhase < Phase
5
+
6
+ handle_start 'html', 'head'
7
+
8
+ handle_end %w( head br ) => 'ImplyHead'
9
+
10
+ def process_eof
11
+ startTagHead('head', {})
12
+ @parser.phase.process_eof
13
+ end
14
+
15
+ def processSpaceCharacters(data)
16
+ end
17
+
18
+ def processCharacters(data)
19
+ startTagHead('head', {})
20
+ @parser.phase.processCharacters(data)
21
+ end
22
+
23
+ def startTagHead(name, attributes)
24
+ @tree.insert_element(name, attributes)
25
+ @tree.head_pointer = @tree.open_elements[-1]
26
+ @parser.phase = @parser.phases[:inHead]
27
+ end
28
+
29
+ def startTagOther(name, attributes)
30
+ startTagHead('head', {})
31
+ @parser.phase.processStartTag(name, attributes)
32
+ end
33
+
34
+ def endTagImplyHead(name)
35
+ startTagHead('head', {})
36
+ @parser.phase.processEndTag(name)
37
+ end
38
+
39
+ def endTagOther(name)
40
+ parse_error("end-tag-after-implied-root", {"name" => name})
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,41 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class BeforeHtmlPhase < Phase
5
+
6
+ def process_eof
7
+ insert_html_element
8
+ @parser.phase.process_eof
9
+ end
10
+
11
+ def processComment(data)
12
+ @tree.insert_comment(data, @tree.document)
13
+ end
14
+
15
+ def processSpaceCharacters(data)
16
+ end
17
+
18
+ def processCharacters(data)
19
+ insert_html_element
20
+ @parser.phase.processCharacters(data)
21
+ end
22
+
23
+ def processStartTag(name, attributes, self_closing=false)
24
+ @parser.first_start_tag = true if name == 'html'
25
+ insert_html_element
26
+ @parser.phase.processStartTag(name, attributes)
27
+ end
28
+
29
+ def processEndTag(name)
30
+ insert_html_element
31
+ @parser.phase.processEndTag(name)
32
+ end
33
+
34
+ def insert_html_element
35
+ element = @tree.createElement('html', {})
36
+ @tree.open_elements << element
37
+ @tree.document.appendChild(element)
38
+ @parser.phase = @parser.phases[:beforeHead]
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,636 @@
1
+ require 'html5/html5parser/phase'
2
+
3
+ module HTML5
4
+ class InBodyPhase < Phase
5
+
6
+ # http://www.whatwg.org/specs/web-apps/current-work/#in-body
7
+
8
+ handle_start 'html'
9
+ handle_start %w(base link meta script style title) => 'ProcessInHead'
10
+
11
+ handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
12
+
13
+ handle_start 'input', 'textarea', 'select', 'isindex', %w(applet marquee object)
14
+
15
+ handle_start %w(li dd dt) => 'ListItem'
16
+
17
+ handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
18
+
19
+ handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
20
+ handle_start 'nobr'
21
+
22
+ handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
23
+
24
+ handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
25
+
26
+ handle_start %w(caption col colgroup frame frameset head tbody td tfoot th thead tr) => 'Misplaced'
27
+
28
+ handle_start %w(option optgroup)
29
+
30
+ handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
31
+
32
+ handle_start %w[math] => 'ForeignContent'
33
+
34
+ handle_end 'p', 'body', 'html', 'form', %w(applet button marquee object), %w(dd dt li) => 'ListItem'
35
+
36
+ handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
37
+
38
+ handle_end HEADING_ELEMENTS => 'Heading'
39
+
40
+ handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
41
+
42
+ handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
43
+
44
+ handle_end 'br'
45
+
46
+ handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
47
+
48
+ handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
49
+
50
+ handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
51
+
52
+ def initialize(parser, tree)
53
+ super(parser, tree)
54
+
55
+ # for special handling of whitespace in <pre>
56
+ class << self
57
+ alias processSpaceCharactersNonPre processSpaceCharacters
58
+ end
59
+ end
60
+
61
+ def processSpaceCharactersDropNewline(data)
62
+ # #Sometimes (start of <pre> blocks) we want to drop leading newlines
63
+
64
+ class << self
65
+ remove_method :processSpaceCharacters rescue nil
66
+ alias processSpaceCharacters processSpaceCharactersNonPre
67
+ end
68
+
69
+ if (data.length > 0 and data[0] == ?\n &&
70
+ %w[listing pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
71
+ data = data[1..-1]
72
+ end
73
+
74
+ if data.length > 0
75
+ @tree.reconstructActiveFormattingElements
76
+ @tree.insertText(data)
77
+ end
78
+ end
79
+
80
+ def processSpaceCharacters(data)
81
+ @tree.reconstructActiveFormattingElements()
82
+ @tree.insertText(data)
83
+ end
84
+
85
+ def processCharacters(data)
86
+ # XXX The specification says to do this for every character at the
87
+ # moment, but apparently that doesn't match the real world so we don't
88
+ # do it for space characters.
89
+ @tree.reconstructActiveFormattingElements
90
+ @tree.insertText(data)
91
+ end
92
+
93
+ def startTagProcessInHead(name, attributes)
94
+ @parser.phases[:inHead].processStartTag(name, attributes)
95
+ end
96
+
97
+ def startTagBody(name, attributes)
98
+ parse_error("unexpected-start-tag", {"name" => "body"})
99
+
100
+ if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
101
+ assert @parser.inner_html
102
+ else
103
+ attributes.each do |attr, value|
104
+ unless @tree.open_elements[1].attributes.has_key?(attr)
105
+ @tree.open_elements[1].attributes[attr] = value
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ def startTagCloseP(name, attributes)
112
+ endTagP('p') if in_scope?('p')
113
+ @tree.insert_element(name, attributes)
114
+ if ['pre', 'listing'].include?(name)
115
+ class << self
116
+ remove_method :processSpaceCharacters rescue nil
117
+ alias processSpaceCharacters processSpaceCharactersDropNewline
118
+ end
119
+ end
120
+ end
121
+
122
+ def startTagForm(name, attributes)
123
+ if @tree.formPointer
124
+ parse_error("unexpected-start-tag", {"name" => name})
125
+ else
126
+ endTagP('p') if in_scope?('p')
127
+ @tree.insert_element(name, attributes)
128
+ @tree.formPointer = @tree.open_elements.last
129
+ end
130
+ end
131
+
132
+ def startTagListItem(name, attributes)
133
+ endTagP('p') if in_scope?('p')
134
+ stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
135
+ stopName = stopNames[name]
136
+
137
+ @tree.open_elements.reverse.each_with_index do |node, i|
138
+ if stopName.include?(node.name)
139
+ poppedNodes = (0..i).collect { @tree.open_elements.pop }
140
+ if i >= 1
141
+ parse_error(
142
+ i == 1 ? "missing-end-tag" : "missing-end-tags",
143
+ {"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
144
+
145
+ end
146
+ break
147
+ end
148
+
149
+ # Phrasing elements are all non special, non scoping, non
150
+ # formatting elements
151
+ break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
152
+ end
153
+
154
+ # Always insert an <li> element.
155
+ @tree.insert_element(name, attributes)
156
+ end
157
+
158
+ def startTagPlaintext(name, attributes)
159
+ endTagP('p') if in_scope?('p')
160
+ @tree.insert_element(name, attributes)
161
+ @parser.tokenizer.content_model_flag = :PLAINTEXT
162
+ end
163
+
164
+ def startTagHeading(name, attributes)
165
+ endTagP('p') if in_scope?('p')
166
+
167
+ # Uncomment the following for IE7 behavior:
168
+ # HEADING_ELEMENTS.each do |element|
169
+ # if in_scope?(element)
170
+ # parse_error("unexpected-start-tag", {"name" => name})
171
+ #
172
+ # remove_open_elements_until do |element|
173
+ # HEADING_ELEMENTS.include?(element.name)
174
+ # end
175
+ #
176
+ # break
177
+ # end
178
+ # end
179
+ @tree.insert_element(name, attributes)
180
+ end
181
+
182
+ def startTagA(name, attributes)
183
+ if afeAElement = @tree.elementInActiveFormattingElements('a')
184
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
185
+ endTagFormatting('a')
186
+ @tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
187
+ @tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
188
+ end
189
+ @tree.reconstructActiveFormattingElements
190
+ addFormattingElement(name, attributes)
191
+ end
192
+
193
+ def startTagFormatting(name, attributes)
194
+ @tree.reconstructActiveFormattingElements
195
+ addFormattingElement(name, attributes)
196
+ end
197
+
198
+ def startTagNobr(name, attributes)
199
+ @tree.reconstructActiveFormattingElements
200
+ if in_scope?('nobr')
201
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
202
+ processEndTag('nobr')
203
+ # XXX Need tests that trigger the following
204
+ @tree.reconstructActiveFormattingElements
205
+ end
206
+ addFormattingElement(name, attributes)
207
+ end
208
+
209
+ def startTagButton(name, attributes)
210
+ if in_scope?('button')
211
+ parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
212
+ processEndTag('button')
213
+ @parser.phase.processStartTag(name, attributes)
214
+ else
215
+ @tree.reconstructActiveFormattingElements
216
+ @tree.insert_element(name, attributes)
217
+ @tree.activeFormattingElements.push(Marker)
218
+ end
219
+ end
220
+
221
+ def startTagAppletMarqueeObject(name, attributes)
222
+ @tree.reconstructActiveFormattingElements
223
+ @tree.insert_element(name, attributes)
224
+ @tree.activeFormattingElements.push(Marker)
225
+ end
226
+
227
+ def startTagXmp(name, attributes)
228
+ @tree.reconstructActiveFormattingElements
229
+ @tree.insert_element(name, attributes)
230
+ @parser.tokenizer.content_model_flag = :CDATA
231
+ end
232
+
233
+ def startTagTable(name, attributes)
234
+ processEndTag('p') if in_scope?('p')
235
+ @tree.insert_element(name, attributes)
236
+ @parser.phase = @parser.phases[:inTable]
237
+ end
238
+
239
+ def startTagVoidFormatting(name, attributes)
240
+ @tree.reconstructActiveFormattingElements
241
+ @tree.insert_element(name, attributes)
242
+ @tree.open_elements.pop
243
+ end
244
+
245
+ def startTagHr(name, attributes)
246
+ endTagP('p') if in_scope?('p')
247
+ @tree.insert_element(name, attributes)
248
+ @tree.open_elements.pop
249
+ end
250
+
251
+ def startTagImage(name, attributes)
252
+ # No really...
253
+ parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
254
+ processStartTag('img', attributes)
255
+ end
256
+
257
+ def startTagInput(name, attributes)
258
+ @tree.reconstructActiveFormattingElements
259
+ @tree.insert_element(name, attributes)
260
+ if @tree.formPointer
261
+ # XXX Not exactly sure what to do here
262
+ # @tree.open_elements[-1].form = @tree.formPointer
263
+ end
264
+ @tree.open_elements.pop
265
+ end
266
+
267
+ def startTagIsindex(name, attributes)
268
+ parse_error("deprecated-tag", {"name" => "isindex"})
269
+ return if @tree.formPointer
270
+ processStartTag('form', {})
271
+ processStartTag('hr', {})
272
+ processStartTag('p', {})
273
+ processStartTag('label', {})
274
+ # XXX Localization ...
275
+ processCharacters('This is a searchable index. Insert your search keywords here: ')
276
+ attributes['name'] = 'isindex'
277
+ attrs = attributes.to_a
278
+ processStartTag('input', attributes)
279
+ processEndTag('label')
280
+ processEndTag('p')
281
+ processStartTag('hr', {})
282
+ processEndTag('form')
283
+ end
284
+
285
+ def startTagTextarea(name, attributes)
286
+ # XXX Form element pointer checking here as well...
287
+ @tree.insert_element(name, attributes)
288
+ @parser.tokenizer.content_model_flag = :RCDATA
289
+ class << self
290
+ remove_method :processSpaceCharacters rescue nil
291
+ alias processSpaceCharacters processSpaceCharactersDropNewline
292
+ end
293
+ end
294
+
295
+ # iframe, noembed noframes, noscript(if scripting enabled)
296
+ def startTagCdata(name, attributes)
297
+ @tree.insert_element(name, attributes)
298
+ @parser.tokenizer.content_model_flag = :CDATA
299
+ end
300
+
301
+ def startTagSelect(name, attributes)
302
+ @tree.reconstructActiveFormattingElements
303
+ @tree.insert_element(name, attributes)
304
+
305
+ if [@parser.phases[:inTable], @parser.phases[:inCaption],
306
+ @parser.phases[:inColumnGroup], @parser.phases[:inTableBody], @parser.phases[:inRow],
307
+ @parser.phases[:inCell]].include?(@parser.phase)
308
+ @parser.phase = @parser.phases[:inSelectInTable]
309
+ else
310
+ @parser.phase = @parser.phases[:inSelect]
311
+ end
312
+ end
313
+
314
+ def startTagMisplaced(name, attributes)
315
+ # Elements that should be children of other elements that have a
316
+ # different insertion mode; here they are ignored
317
+ # "caption", "col", "colgroup", "frame", "frameset", "head",
318
+ # "tbody", "td", "tfoot", "th", "thead",
319
+ # "tr", "noscript"
320
+ parse_error("unexpected-start-tag-ignored", {"name" => name})
321
+ end
322
+
323
+ def startTagOptionOptgroup(name, attributes)
324
+ if in_scope?('option')
325
+ endTagOther('option')
326
+ end
327
+ @tree.reconstructActiveFormattingElements
328
+ @tree.insert_element(name, attributes)
329
+ end
330
+
331
+ def startTagNew(name, attributes)
332
+ # New HTML5 elements, "event-source", "section", "nav",
333
+ # "article", "aside", "header", "footer", "datagrid", "command"
334
+ # $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
335
+ startTagOther(name, attributes)
336
+ #raise NotImplementedError
337
+ end
338
+
339
+ def startTagOther(name, attributes)
340
+ @tree.reconstructActiveFormattingElements
341
+ @tree.insert_element(name, attributes)
342
+ end
343
+
344
+ def startTagForeignContent(name, attributes)
345
+ @tree.reconstructActiveFormattingElements
346
+ attributes = adjust_mathml_attributes(attributes)
347
+ attributes = adjust_foreign_attributes(attributes)
348
+ @tree.insert_foreign_element(name, attributes, :math)
349
+ if false
350
+ # If the token has its self-closing flag set, pop the current node off the stack
351
+ # of open elements and acknowledge the token's self-closing flag.
352
+ else
353
+ @parser.secondary_phase = @parser.phase
354
+ @parser.phase = @parser.phases[:inForeignContent]
355
+ end
356
+ end
357
+
358
+ def endTagP(name)
359
+ @tree.generateImpliedEndTags('p') if in_scope?('p')
360
+ parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
361
+ if in_scope?('p')
362
+ @tree.open_elements.pop while in_scope?('p')
363
+ else
364
+ startTagCloseP('p', {})
365
+ endTagP('p')
366
+ end
367
+ end
368
+
369
+ def endTagBody(name)
370
+ # XXX Need to take open <p> tags into account here. We shouldn't imply
371
+ # </p> but we should not throw a parse error either. Specification is
372
+ # likely to be updated.
373
+ unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body'
374
+ # inner_html case
375
+ parse_error "unexpected-end-tag", {:name => 'body'}
376
+ return
377
+ end
378
+ unless @tree.open_elements.last.name == 'body'
379
+ parse_error("expected-one-end-tag-but-got-another",
380
+ {"expectedName" => "body",
381
+ "gotName" => @tree.open_elements.last.name})
382
+ end
383
+ @parser.phase = @parser.phases[:afterBody]
384
+ end
385
+
386
+ def endTagHtml(name)
387
+ endTagBody(name)
388
+ @parser.phase.processEndTag(name) unless @parser.inner_html
389
+ end
390
+
391
+ def endTagBlock(name)
392
+ @tree.generateImpliedEndTags if in_scope?(name)
393
+
394
+ unless @tree.open_elements.last.name == name
395
+ parse_error("end-tag-too-early", {"name" => name})
396
+ end
397
+
398
+ if in_scope?(name)
399
+ remove_open_elements_until(name)
400
+ end
401
+ end
402
+
403
+ def endTagForm(name)
404
+ @tree.formPointer = nil
405
+ if !in_scope?(name)
406
+ # parse error
407
+ else
408
+ @tree.generateImpliedEndTags
409
+ parse_error("end-tag-too-early-ignored", {"name" => "form"}) if @tree.open_elements.last.name != name
410
+ until name == @tree.open_elements.pop.name
411
+ end
412
+ end
413
+ end
414
+
415
+ def endTagListItem(name)
416
+ # AT Could merge this with the Block case
417
+ @tree.generateImpliedEndTags(name) if in_scope?(name)
418
+
419
+ unless @tree.open_elements.last.name == name
420
+ parse_error("end-tag-too-early", {"name" => name})
421
+ end
422
+
423
+ remove_open_elements_until(name) if in_scope?(name)
424
+ end
425
+
426
+ def endTagHeading(name)
427
+ HEADING_ELEMENTS.each do |element|
428
+ if in_scope?(element)
429
+ @tree.generateImpliedEndTags
430
+ break
431
+ end
432
+ end
433
+
434
+ unless @tree.open_elements.last.name == name
435
+ parse_error("end-tag-too-early", {"name" => name})
436
+ end
437
+
438
+ HEADING_ELEMENTS.each do |element|
439
+ if in_scope?(element)
440
+ remove_open_elements_until {|el| HEADING_ELEMENTS.include?(el.name)}
441
+ break
442
+ end
443
+ end
444
+ end
445
+
446
+ # The much-feared adoption agency algorithm
447
+ def endTagFormatting(name)
448
+ # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
449
+ # XXX Better parse_error messages appreciated.
450
+ while true
451
+ # Step 1 paragraph 1
452
+ afeElement = @tree.elementInActiveFormattingElements(name)
453
+ if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
454
+ parse_error("adoption-agency-1.1", {"name" => name})
455
+ return
456
+ # Step 1 paragraph 2
457
+ elsif not @tree.open_elements.include?(afeElement)
458
+ parse_error("adoption-agency-1.2", {"name" => name})
459
+ @tree.activeFormattingElements.delete(afeElement)
460
+ return
461
+ end
462
+
463
+ # Step 1 paragraph 3
464
+ if afeElement != @tree.open_elements.last
465
+ parse_error("adoption-agency-1.3", {"name" => name})
466
+ end
467
+
468
+ # Step 2
469
+ # Start of the adoption agency algorithm proper
470
+ afeIndex = @tree.open_elements.index(afeElement)
471
+ furthestBlock = nil
472
+ @tree.open_elements[afeIndex..-1].each do |element|
473
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
474
+ furthestBlock = element
475
+ break
476
+ end
477
+ end
478
+
479
+ # Step 3
480
+ if furthestBlock.nil?
481
+ element = remove_open_elements_until {|el| el == afeElement }
482
+ @tree.activeFormattingElements.delete(element)
483
+ return
484
+ end
485
+ commonAncestor = @tree.open_elements[afeIndex - 1]
486
+
487
+ # Step 5
488
+ furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
489
+
490
+ # Step 6
491
+ # The bookmark is supposed to help us identify where to reinsert
492
+ # nodes in step 12. We have to ensure that we reinsert nodes after
493
+ # the node before the active formatting element. Note the bookmark
494
+ # can move in step 7.4
495
+ bookmark = @tree.activeFormattingElements.index(afeElement)
496
+
497
+ # Step 7
498
+ lastNode = node = furthestBlock
499
+ while true
500
+ # AT replace this with a function and recursion?
501
+ # Node is element before node in open elements
502
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
503
+ until @tree.activeFormattingElements.include?(node)
504
+ tmpNode = node
505
+ node = @tree.open_elements[@tree.open_elements.index(node) - 1]
506
+ @tree.open_elements.delete(tmpNode)
507
+ end
508
+ # Step 7.3
509
+ break if node == afeElement
510
+ # Step 7.4
511
+ if lastNode == furthestBlock
512
+ # XXX should this be index(node) or index(node)+1
513
+ # Anne: I think +1 is ok. Given x = [2,3,4,5]
514
+ # x.index(3) gives 1 and then x[1 +1] gives 4...
515
+ bookmark = @tree.activeFormattingElements.index(node) + 1
516
+ end
517
+ # Step 7.5
518
+ cite = node.parent
519
+ if node.hasContent
520
+ clone = node.cloneNode
521
+ # Replace node with clone
522
+ @tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
523
+ @tree.open_elements[@tree.open_elements.index(node)] = clone
524
+ node = clone
525
+ end
526
+ # Step 7.6
527
+ # Remove lastNode from its parents, if any
528
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
529
+ node.appendChild(lastNode)
530
+ # Step 7.7
531
+ lastNode = node
532
+ # End of inner loop
533
+ end
534
+
535
+ # Step 8
536
+ lastNode.parent.removeChild(lastNode) if lastNode.parent
537
+ commonAncestor.appendChild(lastNode)
538
+
539
+ # Step 9
540
+ clone = afeElement.cloneNode
541
+
542
+ # Step 10
543
+ furthestBlock.reparentChildren(clone)
544
+
545
+ # Step 11
546
+ furthestBlock.appendChild(clone)
547
+
548
+ # Step 12
549
+ @tree.activeFormattingElements.delete(afeElement)
550
+ @tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
551
+
552
+ # Step 13
553
+ @tree.open_elements.delete(afeElement)
554
+ @tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
555
+ end
556
+ end
557
+
558
+ def endTagAppletButtonMarqueeObject(name)
559
+ @tree.generateImpliedEndTags if in_scope?(name)
560
+
561
+ unless @tree.open_elements.last.name == name
562
+ parse_error("end-tag-too-early", {"name" => name})
563
+ end
564
+
565
+ if in_scope?(name)
566
+ remove_open_elements_until(name)
567
+
568
+ @tree.clearActiveFormattingElements
569
+ end
570
+ end
571
+
572
+ def endTagMisplaced(name)
573
+ # This handles elements with end tags in other insertion modes.
574
+ parse_error("unexpected-end-tag", {"name" => name})
575
+ end
576
+
577
+ def endTagBr(name)
578
+ parse_error("unexpected-end-tag-treated-as",
579
+ {"originalName" => "br", "newName" => "br element"})
580
+ @tree.reconstructActiveFormattingElements
581
+ @tree.insert_element(name, {})
582
+ @tree.open_elements.pop()
583
+ end
584
+
585
+ def endTagNone(name)
586
+ # This handles elements with no end tag.
587
+ parse_error("no-end-tag", {"name" => name})
588
+ end
589
+
590
+ def endTagCdataTextAreaXmp(name)
591
+ if @tree.open_elements.last.name == name
592
+ @tree.open_elements.pop
593
+ else
594
+ parse_error("unexpected-end-tag", {"name" => name})
595
+ end
596
+ end
597
+
598
+ def endTagNew(name)
599
+ # New HTML5 elements, "event-source", "section", "nav",
600
+ # "article", "aside", "header", "footer", "datagrid", "command"
601
+ # STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
602
+ endTagOther(name)
603
+ #raise NotImplementedError
604
+ end
605
+
606
+ def endTagOther(name)
607
+ # XXX This logic should be moved into the treebuilder
608
+ @tree.open_elements.reverse.each do |node|
609
+ if node.name == name
610
+ @tree.generateImpliedEndTags
611
+
612
+ unless @tree.open_elements.last.name == name
613
+ parse_error("unexpected-end-tag", {"name" => name})
614
+ end
615
+
616
+ remove_open_elements_until {|element| element == node }
617
+
618
+ break
619
+ else
620
+ if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
621
+ parse_error("unexpected-end-tag", {"name" => name})
622
+ break
623
+ end
624
+ end
625
+ end
626
+ end
627
+
628
+ protected
629
+
630
+ def addFormattingElement(name, attributes)
631
+ @tree.insert_element(name, attributes)
632
+ @tree.activeFormattingElements.push(@tree.open_elements.last)
633
+ end
634
+
635
+ end
636
+ end