spk-html5 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. data/History.txt +10 -0
  2. data/Manifest.txt +73 -0
  3. data/README +45 -0
  4. data/Rakefile.rb +33 -0
  5. data/bin/html5 +7 -0
  6. data/lib/html5.rb +13 -0
  7. data/lib/html5/cli.rb +248 -0
  8. data/lib/html5/constants.rb +1061 -0
  9. data/lib/html5/filters/base.rb +10 -0
  10. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  11. data/lib/html5/filters/iso639codes.rb +755 -0
  12. data/lib/html5/filters/optionaltags.rb +198 -0
  13. data/lib/html5/filters/rfc2046.rb +31 -0
  14. data/lib/html5/filters/rfc3987.rb +91 -0
  15. data/lib/html5/filters/sanitizer.rb +15 -0
  16. data/lib/html5/filters/validator.rb +834 -0
  17. data/lib/html5/filters/whitespace.rb +36 -0
  18. data/lib/html5/html5parser.rb +247 -0
  19. data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
  20. data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
  21. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  22. data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  23. data/lib/html5/html5parser/after_head_phase.rb +55 -0
  24. data/lib/html5/html5parser/before_head_phase.rb +44 -0
  25. data/lib/html5/html5parser/before_html_phase.rb +41 -0
  26. data/lib/html5/html5parser/in_body_phase.rb +636 -0
  27. data/lib/html5/html5parser/in_caption_phase.rb +69 -0
  28. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  29. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  30. data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
  31. data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
  32. data/lib/html5/html5parser/in_head_phase.rb +143 -0
  33. data/lib/html5/html5parser/in_row_phase.rb +96 -0
  34. data/lib/html5/html5parser/in_select_phase.rb +90 -0
  35. data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
  36. data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
  37. data/lib/html5/html5parser/in_table_phase.rb +177 -0
  38. data/lib/html5/html5parser/initial_phase.rb +133 -0
  39. data/lib/html5/html5parser/phase.rb +171 -0
  40. data/lib/html5/inputstream.rb +735 -0
  41. data/lib/html5/liberalxmlparser.rb +158 -0
  42. data/lib/html5/sanitizer.rb +209 -0
  43. data/lib/html5/serializer.rb +2 -0
  44. data/lib/html5/serializer/htmlserializer.rb +179 -0
  45. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  46. data/lib/html5/sniffer.rb +45 -0
  47. data/lib/html5/tokenizer.rb +1059 -0
  48. data/lib/html5/treebuilders.rb +24 -0
  49. data/lib/html5/treebuilders/base.rb +339 -0
  50. data/lib/html5/treebuilders/hpricot.rb +231 -0
  51. data/lib/html5/treebuilders/rexml.rb +215 -0
  52. data/lib/html5/treebuilders/simpletree.rb +191 -0
  53. data/lib/html5/treewalkers.rb +26 -0
  54. data/lib/html5/treewalkers/base.rb +162 -0
  55. data/lib/html5/treewalkers/hpricot.rb +48 -0
  56. data/lib/html5/treewalkers/rexml.rb +48 -0
  57. data/lib/html5/treewalkers/simpletree.rb +48 -0
  58. data/lib/html5/version.rb +3 -0
  59. data/test/preamble.rb +69 -0
  60. data/test/test_cli.rb +16 -0
  61. data/test/test_encoding.rb +35 -0
  62. data/test/test_input_stream.rb +26 -0
  63. data/test/test_lxp.rb +283 -0
  64. data/test/test_parser.rb +63 -0
  65. data/test/test_sanitizer.rb +173 -0
  66. data/test/test_serializer.rb +67 -0
  67. data/test/test_sniffer.rb +27 -0
  68. data/test/test_stream.rb +71 -0
  69. data/test/test_tokenizer.rb +95 -0
  70. data/test/test_treewalkers.rb +135 -0
  71. data/test/test_validator.rb +31 -0
  72. data/test/tokenizer_test_parser.rb +67 -0
  73. data/test19.rb +38 -0
  74. metadata +198 -0
@@ -0,0 +1,48 @@
1
+ require 'html5/treewalkers/base'
2
+ require 'rexml/document'
3
+
4
+ module HTML5
5
+ module TreeWalkers
6
+ module Hpricot
7
+ class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
8
+
9
+ def node_details(node)
10
+ case node
11
+ when ::Hpricot::Elem
12
+ if node.name.empty?
13
+ [:DOCUMENT_FRAGMENT]
14
+ else
15
+ [:ELEMENT, node.name,
16
+ node.attributes.map {|name, value| [name, value]},
17
+ !node.empty?]
18
+ end
19
+ when ::Hpricot::Text
20
+ [:TEXT, node.content]
21
+ when ::Hpricot::Comment
22
+ [:COMMENT, node.content]
23
+ when ::Hpricot::Doc
24
+ [:DOCUMENT]
25
+ when ::Hpricot::DocType
26
+ [:DOCTYPE, node.target, node.public_id, node.system_id]
27
+ when ::Hpricot::XMLDecl
28
+ [nil]
29
+ else
30
+ [:UNKNOWN, node.class.inspect]
31
+ end
32
+ end
33
+
34
+ def first_child(node)
35
+ node.children.first
36
+ end
37
+
38
+ def next_sibling(node)
39
+ node.next_node
40
+ end
41
+
42
+ def parent(node)
43
+ node.parent
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,48 @@
1
+ require 'html5/treewalkers/base'
2
+ require 'rexml/document'
3
+
4
+ module HTML5
5
+ module TreeWalkers
6
+ module REXML
7
+ class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
8
+
9
+ def node_details(node)
10
+ case node
11
+ when ::REXML::Document
12
+ [:DOCUMENT]
13
+ when ::REXML::Element
14
+ if !node.name
15
+ [:DOCUMENT_FRAGMENT]
16
+ else
17
+ [:ELEMENT, node.name,
18
+ node.attributes.map {|name,value| [name,value]},
19
+ node.has_elements? || node.has_text?]
20
+ end
21
+ when ::REXML::Text
22
+ [:TEXT, node.value]
23
+ when ::REXML::Comment
24
+ [:COMMENT, node.string]
25
+ when ::REXML::DocType
26
+ [:DOCTYPE, node.name, node.public, node.system]
27
+ when ::REXML::XMLDecl
28
+ [nil]
29
+ else
30
+ [:UNKNOWN, node.class.inspect]
31
+ end
32
+ end
33
+
34
+ def first_child(node)
35
+ node.children.first
36
+ end
37
+
38
+ def next_sibling(node)
39
+ node.next_sibling
40
+ end
41
+
42
+ def parent(node)
43
+ node.parent
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,48 @@
1
+ require 'html5/treewalkers/base'
2
+
3
+ module HTML5
4
+ module TreeWalkers
5
+ module SimpleTree
6
+ class TreeWalker < HTML5::TreeWalkers::Base
7
+ include HTML5::TreeBuilders::SimpleTree
8
+
9
+ def walk(node)
10
+ case node
11
+ when Document, DocumentFragment
12
+ return
13
+
14
+ when DocumentType
15
+ yield doctype(node.name, node.public_id, node.system_id)
16
+
17
+ when TextNode
18
+ text(node.value) {|token| yield token}
19
+
20
+ when Element
21
+ if VOID_ELEMENTS.include?(node.name)
22
+ yield empty_tag(node.name, node.attributes, node.hasContent())
23
+ else
24
+ yield start_tag(node.name, node.attributes)
25
+ for child in node.childNodes
26
+ walk(child) {|token| yield token}
27
+ end
28
+ yield end_tag(node.name)
29
+ end
30
+
31
+ when CommentNode
32
+ yield comment(node.value)
33
+
34
+ else
35
+ puts '?'
36
+ yield unknown(node.class)
37
+ end
38
+ end
39
+
40
+ def each
41
+ for child in @tree.childNodes
42
+ walk(child) {|node| yield node}
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,3 @@
1
+ module HTML5
2
+ VERSION = '0.10.1'
3
+ end
@@ -0,0 +1,69 @@
1
+ require 'test/unit'
2
+
3
+ HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
4
+
5
+ if File.exists?(File.join(HTML5_BASE, 'ruby', 'testdata'))
6
+ TESTDATA_DIR = File.join(HTML5_BASE, 'ruby', 'testdata')
7
+ else
8
+ TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
9
+ end
10
+
11
+ $:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib')
12
+ $:.unshift File.dirname(__FILE__)
13
+
14
+ def html5_test_files(subdirectory)
15
+ Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
16
+ end
17
+
18
+ require 'rubygems'
19
+ require 'json'
20
+
21
+ module HTML5
22
+ module TestSupport
23
+ # convert the output of str(document) to the format used in the testcases
24
+ def convertTreeDump(treedump)
25
+ treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
26
+ end
27
+
28
+ def sortattrs(output)
29
+ output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
30
+ match.split("\n").sort.join("\n")
31
+ end
32
+ end
33
+
34
+ class TestData
35
+ include Enumerable
36
+
37
+ def initialize(filename, sections)
38
+ @f = open(filename)
39
+ @sections = sections
40
+ end
41
+
42
+ def each
43
+ data = {}
44
+ key = nil
45
+ @f.each_line do |line|
46
+ if line[0] == ?# and @sections.include?(line[1..-2])
47
+ heading = line[1..-2]
48
+ if data.any? and heading == @sections[0]
49
+ data[key].chomp! #Remove trailing newline
50
+ yield normaliseOutput(data)
51
+ data = {}
52
+ end
53
+ key = heading
54
+ data[key]=""
55
+ elsif key
56
+ data[key] += line
57
+ end
58
+ end
59
+ yield normaliseOutput(data) if data
60
+ end
61
+
62
+ def normaliseOutput(data)
63
+ #Remove trailing newlines
64
+ data.keys.each { |key| data[key].chomp! }
65
+ @sections.map {|heading| data[heading]}
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,16 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+ require "html5/cli"
3
+
4
+ class TestCli < Test::Unit::TestCase
5
+ def test_open_input
6
+ assert_equal $stdin, HTML5::CLI.open_input('-')
7
+ assert_kind_of StringIO, HTML5::CLI.open_input('http://whatwg.org/')
8
+ assert_kind_of File, HTML5::CLI.open_input('testdata/sites/google-results.htm')
9
+ end
10
+
11
+ def test_parse_opts
12
+ HTML5::CLI.parse_opts [] # TODO test defaults
13
+ assert_equal 'hpricot', HTML5::CLI.parse_opts(['-b', 'hpricot']).treebuilder
14
+ assert_equal 'hpricot', HTML5::CLI.parse_opts(['--treebuilder', 'hpricot']).treebuilder
15
+ end
16
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+
3
+ require 'html5/inputstream'
4
+
5
+ class Html5EncodingTestCase < Test::Unit::TestCase
6
+ include HTML5
7
+ include TestSupport
8
+
9
+ begin
10
+ require 'rubygems'
11
+ require 'UniversalDetector'
12
+
13
+ def test_chardet #TODO: can we get rid of this?
14
+ file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
15
+ stream = HTML5::HTMLInputStream.new(file, :chardet => true)
16
+ assert_equal 'big5', stream.char_encoding.downcase
17
+ rescue LoadError
18
+ puts "chardet not found, skipping chardet tests"
19
+ end
20
+ end
21
+
22
+ html5_test_files('encoding').each do |test_file|
23
+ test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
24
+
25
+ TestData.new(test_file, %w(data encoding)).
26
+ each_with_index do |(input, encoding), index|
27
+
28
+ define_method 'test_%s_%d' % [ test_name, index + 1 ] do
29
+ stream = HTML5::HTMLInputStream.new(input, :chardet => false)
30
+ assert_equal encoding.downcase, stream.char_encoding.downcase, input
31
+ end
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+ require "test/unit"
3
+ require "html5/inputstream"
4
+
5
+ class TestHtml5Inputstream < Test::Unit::TestCase
6
+ def test_newline_in_queue
7
+ stream = HTML5::HTMLInputStream.new("\nfoo")
8
+ stream.unget(stream.char)
9
+ assert_equal [1, 0], stream.position
10
+ end
11
+
12
+ def test_buffer_boundary
13
+ stream = HTML5::HTMLInputStream.new("abcdefghijklmnopqrstuvwxyz" * 50, :encoding => 'windows-1252')
14
+ 1022.times{stream.char}
15
+ assert_equal "i", stream.char
16
+ end
17
+
18
+ def test_chars_until
19
+ stream = HTML5::HTMLInputStream.new("aaaaaaab")
20
+ assert_equal "aaaaaaa", stream.chars_until("b")
21
+
22
+ stream = HTML5::HTMLInputStream.new("aaaaaaab")
23
+ assert_equal "aaaaaaab", stream.chars_until("c")
24
+
25
+ end
26
+ end
@@ -0,0 +1,283 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+
3
+ require 'html5/liberalxmlparser'
4
+
5
+ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
6
+
7
+ def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
8
+ sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
9
+ document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
10
+ if not expected
11
+ expected = input.chomp.gsub(XMLELEM,&sortattrs)
12
+ if expected.respond_to? :force_encoding
13
+ expected = expected.gsub(/&#(\d+);/) {$1.to_i.chr('utf-8')}
14
+ else
15
+ expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
16
+ end
17
+ output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
18
+ assert_equal(expected, output)
19
+ else
20
+ assert_equal(expected, document.to_s.gsub(/'/,'"'))
21
+ end
22
+ end
23
+
24
+ def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
25
+ assert_xml_equal(input, expected, parser)
26
+ end
27
+
28
+ class BasicXhtml5Test < Test::Unit::TestCase
29
+
30
+ def test_title_body_mismatched_close
31
+ assert_xhtml_equal(
32
+ '<title>Xhtml</title><b><i>content</b></i>',
33
+ '<html xmlns="http://www.w3.org/1999/xhtml">' +
34
+ '<head><title>Xhtml</title></head>' +
35
+ '<body><b><i>content</i></b></body>' +
36
+ '</html>')
37
+ end
38
+
39
+ def test_title_body_named_charref
40
+ assert_xhtml_equal(
41
+ '<title>ntilde</title>A &ntilde B',
42
+ '<html xmlns="http://www.w3.org/1999/xhtml">' +
43
+ '<head><title>ntilde</title></head>' +
44
+ '<body>A '+ [0xF1].pack('U') + ' B</body>' +
45
+ '</html>')
46
+ end
47
+ end
48
+
49
+ class BasicXmlTest < Test::Unit::TestCase
50
+
51
+ def test_comment
52
+ assert_xml_equal("<x><!-- foo --></x>")
53
+ end
54
+
55
+ def test_cdata
56
+ assert_xml_equal("<x><![CDATA[foo]]></x>","<x>foo</x>")
57
+ end
58
+
59
+ def test_simple_text
60
+ assert_xml_equal("<p>foo</p>","<p>foo</p>")
61
+ end
62
+
63
+ def test_optional_close
64
+ assert_xml_equal("<p>foo","<p>foo</p>")
65
+ end
66
+
67
+ def test_html_mismatched
68
+ assert_xml_equal("<b><i>foo</b></i>","<b><i>foo</i></b>")
69
+ end
70
+ end
71
+
72
+ class OpmlTest < Test::Unit::TestCase
73
+
74
+ def test_mixedCaseElement
75
+ assert_xml_equal(
76
+ '<opml version="1.0">' +
77
+ '<head><ownerName>Dave Winer</ownerName></head>' +
78
+ '</opml>')
79
+ end
80
+
81
+ def test_mixedCaseAttribute
82
+ assert_xml_equal(
83
+ '<opml version="1.0">' +
84
+ '<body><outline isComment="true"/></body>' +
85
+ '</opml>')
86
+ end
87
+
88
+ def test_malformed
89
+ assert_xml_equal(
90
+ '<opml version="1.0">' +
91
+ '<body><outline text="Odds & Ends"/></body>' +
92
+ '</opml>',
93
+ '<opml version="1.0">' +
94
+ '<body><outline text="Odds &amp; Ends"/></body>' +
95
+ '</opml>')
96
+ end
97
+ end
98
+
99
+ class XhtmlTest < Test::Unit::TestCase
100
+
101
+ def test_mathml
102
+ assert_xhtml_equal <<EOX
103
+ <html xmlns="http://www.w3.org/1999/xhtml">
104
+ <head><title>MathML</title></head>
105
+ <body>
106
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
107
+ <mrow>
108
+ <mi>x</mi>
109
+ <mo>=</mo>
110
+
111
+ <mfrac>
112
+ <mrow>
113
+ <mrow>
114
+ <mo>-</mo>
115
+ <mi>b</mi>
116
+ </mrow>
117
+ <mo>&#177;</mo>
118
+ <msqrt>
119
+
120
+ <mrow>
121
+ <msup>
122
+ <mi>b</mi>
123
+ <mn>2</mn>
124
+ </msup>
125
+ <mo>-</mo>
126
+ <mrow>
127
+
128
+ <mn>4</mn>
129
+ <mo>&#8290;</mo>
130
+ <mi>a</mi>
131
+ <mo>&#8290;</mo>
132
+ <mi>c</mi>
133
+ </mrow>
134
+ </mrow>
135
+
136
+ </msqrt>
137
+ </mrow>
138
+ <mrow>
139
+ <mn>2</mn>
140
+ <mo>&#8290;</mo>
141
+ <mi>a</mi>
142
+ </mrow>
143
+ </mfrac>
144
+
145
+ </mrow>
146
+ </math>
147
+ </body></html>
148
+ EOX
149
+ end
150
+
151
+ def test_svg
152
+ assert_xhtml_equal <<EOX
153
+ <html xmlns="http://www.w3.org/1999/xhtml">
154
+ <head><title>SVG</title></head>
155
+ <body>
156
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
157
+ <path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
158
+ c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
159
+ </path>
160
+ <circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
161
+ </circle>
162
+
163
+ </svg>
164
+ </body></html>
165
+ EOX
166
+ end
167
+
168
+ def test_xlink
169
+ assert_xhtml_equal <<EOX
170
+ <html xmlns="http://www.w3.org/1999/xhtml">
171
+ <head><title>XLINK</title></head>
172
+ <body>
173
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
174
+ <defs xmlns:l="http://www.w3.org/1999/xlink">
175
+ <radialGradient id="s1" fx=".4" fy=".2" r=".7">
176
+ <stop stop-color="#FE8"/>
177
+ <stop stop-color="#D70" offset="1"/>
178
+ </radialGradient>
179
+ <radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
180
+ <radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
181
+ <radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
182
+ </defs>
183
+ <g stroke="#940">
184
+ <path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
185
+ <path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
186
+ <path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
187
+
188
+ <path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
189
+ <path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
190
+ <path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
191
+ </g>
192
+ </svg>
193
+ </body></html>
194
+ EOX
195
+ end
196
+
197
+ def test_br
198
+ assert_xhtml_equal <<EOX1
199
+ <html xmlns="http://www.w3.org/1999/xhtml">
200
+ <head><title>BR</title></head>
201
+ <body>
202
+ <br/>
203
+ </body></html>
204
+ EOX1
205
+ end
206
+
207
+ def test_strong
208
+ assert_xhtml_equal <<EOX
209
+ <html xmlns="http://www.w3.org/1999/xhtml">
210
+ <head><title>STRONG</title></head>
211
+ <body>
212
+ <strong></strong>
213
+ </body></html>
214
+ EOX
215
+ end
216
+
217
+ def test_script
218
+ assert_xhtml_equal <<EOX
219
+ <html xmlns="http://www.w3.org/1999/xhtml">
220
+ <head><title>SCRIPT</title></head>
221
+ <body>
222
+ <script>1 &lt; 2 &amp; 3</script>
223
+ </body></html>
224
+ EOX
225
+ end
226
+
227
+ def test_script_src
228
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
229
+ <html xmlns="http://www.w3.org/1999/xhtml">
230
+ <head><title>SCRIPT</title><script src="http://example.com"/></head>
231
+ <body>
232
+ <script>1 &lt; 2 &amp; 3</script>
233
+ </body></html>
234
+ EOX1
235
+ <html xmlns="http://www.w3.org/1999/xhtml">
236
+ <head><title>SCRIPT</title><script src="http://example.com"></script></head>
237
+ <body>
238
+ <script>1 &lt; 2 &amp; 3</script>
239
+ </body></html>
240
+ EOX2
241
+ end
242
+
243
+ def test_title
244
+ assert_xhtml_equal <<EOX
245
+ <html xmlns="http://www.w3.org/1999/xhtml">
246
+ <head><title>1 &lt; 2 &amp; 3</title></head>
247
+ <body>
248
+ </body></html>
249
+ EOX
250
+ end
251
+
252
+ def test_prolog
253
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
254
+ <?xml version="1.0" encoding="UTF-8" ?>
255
+ <html xmlns="http://www.w3.org/1999/xhtml">
256
+ <head><title>PROLOG</title></head>
257
+ <body>
258
+ </body></html>
259
+ EOX1
260
+ <html xmlns="http://www.w3.org/1999/xhtml">
261
+ <head><title>PROLOG</title></head>
262
+ <body>
263
+ </body></html>
264
+ EOX2
265
+ end
266
+
267
+ def test_tagsoup
268
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
269
+ <html xmlns="http://www.w3.org/1999/xhtml">
270
+ <head><title>TAGSOUP</title></head>
271
+ <body>
272
+ <u><blockquote><p></u>
273
+ </body></html>
274
+ EOX1
275
+ <html xmlns="http://www.w3.org/1999/xhtml">
276
+ <head><title>TAGSOUP</title></head>
277
+ <body>
278
+ <u/><blockquote><u/><p><u/>
279
+ </p></blockquote></body></html>
280
+ EOX2
281
+ end
282
+
283
+ end