spk-html5 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. data/History.txt +10 -0
  2. data/Manifest.txt +73 -0
  3. data/README +45 -0
  4. data/Rakefile.rb +33 -0
  5. data/bin/html5 +7 -0
  6. data/lib/html5.rb +13 -0
  7. data/lib/html5/cli.rb +248 -0
  8. data/lib/html5/constants.rb +1061 -0
  9. data/lib/html5/filters/base.rb +10 -0
  10. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  11. data/lib/html5/filters/iso639codes.rb +755 -0
  12. data/lib/html5/filters/optionaltags.rb +198 -0
  13. data/lib/html5/filters/rfc2046.rb +31 -0
  14. data/lib/html5/filters/rfc3987.rb +91 -0
  15. data/lib/html5/filters/sanitizer.rb +15 -0
  16. data/lib/html5/filters/validator.rb +834 -0
  17. data/lib/html5/filters/whitespace.rb +36 -0
  18. data/lib/html5/html5parser.rb +247 -0
  19. data/lib/html5/html5parser/after_after_body_phase.rb +43 -0
  20. data/lib/html5/html5parser/after_after_frameset_phase.rb +32 -0
  21. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  22. data/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  23. data/lib/html5/html5parser/after_head_phase.rb +55 -0
  24. data/lib/html5/html5parser/before_head_phase.rb +44 -0
  25. data/lib/html5/html5parser/before_html_phase.rb +41 -0
  26. data/lib/html5/html5parser/in_body_phase.rb +636 -0
  27. data/lib/html5/html5parser/in_caption_phase.rb +69 -0
  28. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  29. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  30. data/lib/html5/html5parser/in_foreign_content_phase.rb +50 -0
  31. data/lib/html5/html5parser/in_frameset_phase.rb +56 -0
  32. data/lib/html5/html5parser/in_head_phase.rb +143 -0
  33. data/lib/html5/html5parser/in_row_phase.rb +96 -0
  34. data/lib/html5/html5parser/in_select_phase.rb +90 -0
  35. data/lib/html5/html5parser/in_select_table_phase.rb +35 -0
  36. data/lib/html5/html5parser/in_table_body_phase.rb +92 -0
  37. data/lib/html5/html5parser/in_table_phase.rb +177 -0
  38. data/lib/html5/html5parser/initial_phase.rb +133 -0
  39. data/lib/html5/html5parser/phase.rb +171 -0
  40. data/lib/html5/inputstream.rb +735 -0
  41. data/lib/html5/liberalxmlparser.rb +158 -0
  42. data/lib/html5/sanitizer.rb +209 -0
  43. data/lib/html5/serializer.rb +2 -0
  44. data/lib/html5/serializer/htmlserializer.rb +179 -0
  45. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  46. data/lib/html5/sniffer.rb +45 -0
  47. data/lib/html5/tokenizer.rb +1059 -0
  48. data/lib/html5/treebuilders.rb +24 -0
  49. data/lib/html5/treebuilders/base.rb +339 -0
  50. data/lib/html5/treebuilders/hpricot.rb +231 -0
  51. data/lib/html5/treebuilders/rexml.rb +215 -0
  52. data/lib/html5/treebuilders/simpletree.rb +191 -0
  53. data/lib/html5/treewalkers.rb +26 -0
  54. data/lib/html5/treewalkers/base.rb +162 -0
  55. data/lib/html5/treewalkers/hpricot.rb +48 -0
  56. data/lib/html5/treewalkers/rexml.rb +48 -0
  57. data/lib/html5/treewalkers/simpletree.rb +48 -0
  58. data/lib/html5/version.rb +3 -0
  59. data/test/preamble.rb +69 -0
  60. data/test/test_cli.rb +16 -0
  61. data/test/test_encoding.rb +35 -0
  62. data/test/test_input_stream.rb +26 -0
  63. data/test/test_lxp.rb +283 -0
  64. data/test/test_parser.rb +63 -0
  65. data/test/test_sanitizer.rb +173 -0
  66. data/test/test_serializer.rb +67 -0
  67. data/test/test_sniffer.rb +27 -0
  68. data/test/test_stream.rb +71 -0
  69. data/test/test_tokenizer.rb +95 -0
  70. data/test/test_treewalkers.rb +135 -0
  71. data/test/test_validator.rb +31 -0
  72. data/test/tokenizer_test_parser.rb +67 -0
  73. data/test19.rb +38 -0
  74. metadata +198 -0
@@ -0,0 +1,48 @@
1
+ require 'html5/treewalkers/base'
2
+ require 'rexml/document'
3
+
4
+ module HTML5
5
+ module TreeWalkers
6
+ module Hpricot
7
+ class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
8
+
9
+ def node_details(node)
10
+ case node
11
+ when ::Hpricot::Elem
12
+ if node.name.empty?
13
+ [:DOCUMENT_FRAGMENT]
14
+ else
15
+ [:ELEMENT, node.name,
16
+ node.attributes.map {|name, value| [name, value]},
17
+ !node.empty?]
18
+ end
19
+ when ::Hpricot::Text
20
+ [:TEXT, node.content]
21
+ when ::Hpricot::Comment
22
+ [:COMMENT, node.content]
23
+ when ::Hpricot::Doc
24
+ [:DOCUMENT]
25
+ when ::Hpricot::DocType
26
+ [:DOCTYPE, node.target, node.public_id, node.system_id]
27
+ when ::Hpricot::XMLDecl
28
+ [nil]
29
+ else
30
+ [:UNKNOWN, node.class.inspect]
31
+ end
32
+ end
33
+
34
+ def first_child(node)
35
+ node.children.first
36
+ end
37
+
38
+ def next_sibling(node)
39
+ node.next_node
40
+ end
41
+
42
+ def parent(node)
43
+ node.parent
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,48 @@
1
+ require 'html5/treewalkers/base'
2
+ require 'rexml/document'
3
+
4
+ module HTML5
5
+ module TreeWalkers
6
+ module REXML
7
+ class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
8
+
9
+ def node_details(node)
10
+ case node
11
+ when ::REXML::Document
12
+ [:DOCUMENT]
13
+ when ::REXML::Element
14
+ if !node.name
15
+ [:DOCUMENT_FRAGMENT]
16
+ else
17
+ [:ELEMENT, node.name,
18
+ node.attributes.map {|name,value| [name,value]},
19
+ node.has_elements? || node.has_text?]
20
+ end
21
+ when ::REXML::Text
22
+ [:TEXT, node.value]
23
+ when ::REXML::Comment
24
+ [:COMMENT, node.string]
25
+ when ::REXML::DocType
26
+ [:DOCTYPE, node.name, node.public, node.system]
27
+ when ::REXML::XMLDecl
28
+ [nil]
29
+ else
30
+ [:UNKNOWN, node.class.inspect]
31
+ end
32
+ end
33
+
34
+ def first_child(node)
35
+ node.children.first
36
+ end
37
+
38
+ def next_sibling(node)
39
+ node.next_sibling
40
+ end
41
+
42
+ def parent(node)
43
+ node.parent
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,48 @@
1
+ require 'html5/treewalkers/base'
2
+
3
+ module HTML5
4
+ module TreeWalkers
5
+ module SimpleTree
6
+ class TreeWalker < HTML5::TreeWalkers::Base
7
+ include HTML5::TreeBuilders::SimpleTree
8
+
9
+ def walk(node)
10
+ case node
11
+ when Document, DocumentFragment
12
+ return
13
+
14
+ when DocumentType
15
+ yield doctype(node.name, node.public_id, node.system_id)
16
+
17
+ when TextNode
18
+ text(node.value) {|token| yield token}
19
+
20
+ when Element
21
+ if VOID_ELEMENTS.include?(node.name)
22
+ yield empty_tag(node.name, node.attributes, node.hasContent())
23
+ else
24
+ yield start_tag(node.name, node.attributes)
25
+ for child in node.childNodes
26
+ walk(child) {|token| yield token}
27
+ end
28
+ yield end_tag(node.name)
29
+ end
30
+
31
+ when CommentNode
32
+ yield comment(node.value)
33
+
34
+ else
35
+ puts '?'
36
+ yield unknown(node.class)
37
+ end
38
+ end
39
+
40
+ def each
41
+ for child in @tree.childNodes
42
+ walk(child) {|node| yield node}
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,3 @@
1
+ module HTML5
2
+ VERSION = '0.10.1'
3
+ end
@@ -0,0 +1,69 @@
1
+ require 'test/unit'
2
+
3
+ HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
4
+
5
+ if File.exists?(File.join(HTML5_BASE, 'ruby', 'testdata'))
6
+ TESTDATA_DIR = File.join(HTML5_BASE, 'ruby', 'testdata')
7
+ else
8
+ TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
9
+ end
10
+
11
+ $:.unshift File.join(File.dirname(File.dirname(__FILE__)), 'lib')
12
+ $:.unshift File.dirname(__FILE__)
13
+
14
+ def html5_test_files(subdirectory)
15
+ Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
16
+ end
17
+
18
+ require 'rubygems'
19
+ require 'json'
20
+
21
+ module HTML5
22
+ module TestSupport
23
+ # convert the output of str(document) to the format used in the testcases
24
+ def convertTreeDump(treedump)
25
+ treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
26
+ end
27
+
28
+ def sortattrs(output)
29
+ output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
30
+ match.split("\n").sort.join("\n")
31
+ end
32
+ end
33
+
34
+ class TestData
35
+ include Enumerable
36
+
37
+ def initialize(filename, sections)
38
+ @f = open(filename)
39
+ @sections = sections
40
+ end
41
+
42
+ def each
43
+ data = {}
44
+ key = nil
45
+ @f.each_line do |line|
46
+ if line[0] == ?# and @sections.include?(line[1..-2])
47
+ heading = line[1..-2]
48
+ if data.any? and heading == @sections[0]
49
+ data[key].chomp! #Remove trailing newline
50
+ yield normaliseOutput(data)
51
+ data = {}
52
+ end
53
+ key = heading
54
+ data[key]=""
55
+ elsif key
56
+ data[key] += line
57
+ end
58
+ end
59
+ yield normaliseOutput(data) if data
60
+ end
61
+
62
+ def normaliseOutput(data)
63
+ #Remove trailing newlines
64
+ data.keys.each { |key| data[key].chomp! }
65
+ @sections.map {|heading| data[heading]}
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,16 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+ require "html5/cli"
3
+
4
+ class TestCli < Test::Unit::TestCase
5
+ def test_open_input
6
+ assert_equal $stdin, HTML5::CLI.open_input('-')
7
+ assert_kind_of StringIO, HTML5::CLI.open_input('http://whatwg.org/')
8
+ assert_kind_of File, HTML5::CLI.open_input('testdata/sites/google-results.htm')
9
+ end
10
+
11
+ def test_parse_opts
12
+ HTML5::CLI.parse_opts [] # TODO test defaults
13
+ assert_equal 'hpricot', HTML5::CLI.parse_opts(['-b', 'hpricot']).treebuilder
14
+ assert_equal 'hpricot', HTML5::CLI.parse_opts(['--treebuilder', 'hpricot']).treebuilder
15
+ end
16
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+
3
+ require 'html5/inputstream'
4
+
5
+ class Html5EncodingTestCase < Test::Unit::TestCase
6
+ include HTML5
7
+ include TestSupport
8
+
9
+ begin
10
+ require 'rubygems'
11
+ require 'UniversalDetector'
12
+
13
+ def test_chardet #TODO: can we get rid of this?
14
+ file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
15
+ stream = HTML5::HTMLInputStream.new(file, :chardet => true)
16
+ assert_equal 'big5', stream.char_encoding.downcase
17
+ rescue LoadError
18
+ puts "chardet not found, skipping chardet tests"
19
+ end
20
+ end
21
+
22
+ html5_test_files('encoding').each do |test_file|
23
+ test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
24
+
25
+ TestData.new(test_file, %w(data encoding)).
26
+ each_with_index do |(input, encoding), index|
27
+
28
+ define_method 'test_%s_%d' % [ test_name, index + 1 ] do
29
+ stream = HTML5::HTMLInputStream.new(input, :chardet => false)
30
+ assert_equal encoding.downcase, stream.char_encoding.downcase, input
31
+ end
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,26 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+ require "test/unit"
3
+ require "html5/inputstream"
4
+
5
+ class TestHtml5Inputstream < Test::Unit::TestCase
6
+ def test_newline_in_queue
7
+ stream = HTML5::HTMLInputStream.new("\nfoo")
8
+ stream.unget(stream.char)
9
+ assert_equal [1, 0], stream.position
10
+ end
11
+
12
+ def test_buffer_boundary
13
+ stream = HTML5::HTMLInputStream.new("abcdefghijklmnopqrstuvwxyz" * 50, :encoding => 'windows-1252')
14
+ 1022.times{stream.char}
15
+ assert_equal "i", stream.char
16
+ end
17
+
18
+ def test_chars_until
19
+ stream = HTML5::HTMLInputStream.new("aaaaaaab")
20
+ assert_equal "aaaaaaa", stream.chars_until("b")
21
+
22
+ stream = HTML5::HTMLInputStream.new("aaaaaaab")
23
+ assert_equal "aaaaaaab", stream.chars_until("c")
24
+
25
+ end
26
+ end
@@ -0,0 +1,283 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), 'preamble'))
2
+
3
+ require 'html5/liberalxmlparser'
4
+
5
+ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
6
+
7
+ def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
8
+ sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
9
+ document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
10
+ if not expected
11
+ expected = input.chomp.gsub(XMLELEM,&sortattrs)
12
+ if expected.respond_to? :force_encoding
13
+ expected = expected.gsub(/&#(\d+);/) {$1.to_i.chr('utf-8')}
14
+ else
15
+ expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
16
+ end
17
+ output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
18
+ assert_equal(expected, output)
19
+ else
20
+ assert_equal(expected, document.to_s.gsub(/'/,'"'))
21
+ end
22
+ end
23
+
24
+ def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
25
+ assert_xml_equal(input, expected, parser)
26
+ end
27
+
28
+ class BasicXhtml5Test < Test::Unit::TestCase
29
+
30
+ def test_title_body_mismatched_close
31
+ assert_xhtml_equal(
32
+ '<title>Xhtml</title><b><i>content</b></i>',
33
+ '<html xmlns="http://www.w3.org/1999/xhtml">' +
34
+ '<head><title>Xhtml</title></head>' +
35
+ '<body><b><i>content</i></b></body>' +
36
+ '</html>')
37
+ end
38
+
39
+ def test_title_body_named_charref
40
+ assert_xhtml_equal(
41
+ '<title>ntilde</title>A &ntilde B',
42
+ '<html xmlns="http://www.w3.org/1999/xhtml">' +
43
+ '<head><title>ntilde</title></head>' +
44
+ '<body>A '+ [0xF1].pack('U') + ' B</body>' +
45
+ '</html>')
46
+ end
47
+ end
48
+
49
+ class BasicXmlTest < Test::Unit::TestCase
50
+
51
+ def test_comment
52
+ assert_xml_equal("<x><!-- foo --></x>")
53
+ end
54
+
55
+ def test_cdata
56
+ assert_xml_equal("<x><![CDATA[foo]]></x>","<x>foo</x>")
57
+ end
58
+
59
+ def test_simple_text
60
+ assert_xml_equal("<p>foo</p>","<p>foo</p>")
61
+ end
62
+
63
+ def test_optional_close
64
+ assert_xml_equal("<p>foo","<p>foo</p>")
65
+ end
66
+
67
+ def test_html_mismatched
68
+ assert_xml_equal("<b><i>foo</b></i>","<b><i>foo</i></b>")
69
+ end
70
+ end
71
+
72
+ class OpmlTest < Test::Unit::TestCase
73
+
74
+ def test_mixedCaseElement
75
+ assert_xml_equal(
76
+ '<opml version="1.0">' +
77
+ '<head><ownerName>Dave Winer</ownerName></head>' +
78
+ '</opml>')
79
+ end
80
+
81
+ def test_mixedCaseAttribute
82
+ assert_xml_equal(
83
+ '<opml version="1.0">' +
84
+ '<body><outline isComment="true"/></body>' +
85
+ '</opml>')
86
+ end
87
+
88
+ def test_malformed
89
+ assert_xml_equal(
90
+ '<opml version="1.0">' +
91
+ '<body><outline text="Odds & Ends"/></body>' +
92
+ '</opml>',
93
+ '<opml version="1.0">' +
94
+ '<body><outline text="Odds &amp; Ends"/></body>' +
95
+ '</opml>')
96
+ end
97
+ end
98
+
99
+ class XhtmlTest < Test::Unit::TestCase
100
+
101
+ def test_mathml
102
+ assert_xhtml_equal <<EOX
103
+ <html xmlns="http://www.w3.org/1999/xhtml">
104
+ <head><title>MathML</title></head>
105
+ <body>
106
+ <math xmlns="http://www.w3.org/1998/Math/MathML">
107
+ <mrow>
108
+ <mi>x</mi>
109
+ <mo>=</mo>
110
+
111
+ <mfrac>
112
+ <mrow>
113
+ <mrow>
114
+ <mo>-</mo>
115
+ <mi>b</mi>
116
+ </mrow>
117
+ <mo>&#177;</mo>
118
+ <msqrt>
119
+
120
+ <mrow>
121
+ <msup>
122
+ <mi>b</mi>
123
+ <mn>2</mn>
124
+ </msup>
125
+ <mo>-</mo>
126
+ <mrow>
127
+
128
+ <mn>4</mn>
129
+ <mo>&#8290;</mo>
130
+ <mi>a</mi>
131
+ <mo>&#8290;</mo>
132
+ <mi>c</mi>
133
+ </mrow>
134
+ </mrow>
135
+
136
+ </msqrt>
137
+ </mrow>
138
+ <mrow>
139
+ <mn>2</mn>
140
+ <mo>&#8290;</mo>
141
+ <mi>a</mi>
142
+ </mrow>
143
+ </mfrac>
144
+
145
+ </mrow>
146
+ </math>
147
+ </body></html>
148
+ EOX
149
+ end
150
+
151
+ def test_svg
152
+ assert_xhtml_equal <<EOX
153
+ <html xmlns="http://www.w3.org/1999/xhtml">
154
+ <head><title>SVG</title></head>
155
+ <body>
156
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
157
+ <path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
158
+ c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
159
+ </path>
160
+ <circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
161
+ </circle>
162
+
163
+ </svg>
164
+ </body></html>
165
+ EOX
166
+ end
167
+
168
+ def test_xlink
169
+ assert_xhtml_equal <<EOX
170
+ <html xmlns="http://www.w3.org/1999/xhtml">
171
+ <head><title>XLINK</title></head>
172
+ <body>
173
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
174
+ <defs xmlns:l="http://www.w3.org/1999/xlink">
175
+ <radialGradient id="s1" fx=".4" fy=".2" r=".7">
176
+ <stop stop-color="#FE8"/>
177
+ <stop stop-color="#D70" offset="1"/>
178
+ </radialGradient>
179
+ <radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
180
+ <radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
181
+ <radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
182
+ </defs>
183
+ <g stroke="#940">
184
+ <path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
185
+ <path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
186
+ <path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
187
+
188
+ <path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
189
+ <path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
190
+ <path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
191
+ </g>
192
+ </svg>
193
+ </body></html>
194
+ EOX
195
+ end
196
+
197
+ def test_br
198
+ assert_xhtml_equal <<EOX1
199
+ <html xmlns="http://www.w3.org/1999/xhtml">
200
+ <head><title>BR</title></head>
201
+ <body>
202
+ <br/>
203
+ </body></html>
204
+ EOX1
205
+ end
206
+
207
+ def test_strong
208
+ assert_xhtml_equal <<EOX
209
+ <html xmlns="http://www.w3.org/1999/xhtml">
210
+ <head><title>STRONG</title></head>
211
+ <body>
212
+ <strong></strong>
213
+ </body></html>
214
+ EOX
215
+ end
216
+
217
+ def test_script
218
+ assert_xhtml_equal <<EOX
219
+ <html xmlns="http://www.w3.org/1999/xhtml">
220
+ <head><title>SCRIPT</title></head>
221
+ <body>
222
+ <script>1 &lt; 2 &amp; 3</script>
223
+ </body></html>
224
+ EOX
225
+ end
226
+
227
+ def test_script_src
228
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
229
+ <html xmlns="http://www.w3.org/1999/xhtml">
230
+ <head><title>SCRIPT</title><script src="http://example.com"/></head>
231
+ <body>
232
+ <script>1 &lt; 2 &amp; 3</script>
233
+ </body></html>
234
+ EOX1
235
+ <html xmlns="http://www.w3.org/1999/xhtml">
236
+ <head><title>SCRIPT</title><script src="http://example.com"></script></head>
237
+ <body>
238
+ <script>1 &lt; 2 &amp; 3</script>
239
+ </body></html>
240
+ EOX2
241
+ end
242
+
243
+ def test_title
244
+ assert_xhtml_equal <<EOX
245
+ <html xmlns="http://www.w3.org/1999/xhtml">
246
+ <head><title>1 &lt; 2 &amp; 3</title></head>
247
+ <body>
248
+ </body></html>
249
+ EOX
250
+ end
251
+
252
+ def test_prolog
253
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
254
+ <?xml version="1.0" encoding="UTF-8" ?>
255
+ <html xmlns="http://www.w3.org/1999/xhtml">
256
+ <head><title>PROLOG</title></head>
257
+ <body>
258
+ </body></html>
259
+ EOX1
260
+ <html xmlns="http://www.w3.org/1999/xhtml">
261
+ <head><title>PROLOG</title></head>
262
+ <body>
263
+ </body></html>
264
+ EOX2
265
+ end
266
+
267
+ def test_tagsoup
268
+ assert_xhtml_equal <<EOX1, <<EOX2.strip
269
+ <html xmlns="http://www.w3.org/1999/xhtml">
270
+ <head><title>TAGSOUP</title></head>
271
+ <body>
272
+ <u><blockquote><p></u>
273
+ </body></html>
274
+ EOX1
275
+ <html xmlns="http://www.w3.org/1999/xhtml">
276
+ <head><title>TAGSOUP</title></head>
277
+ <body>
278
+ <u/><blockquote><u/><p><u/>
279
+ </p></blockquote></body></html>
280
+ EOX2
281
+ end
282
+
283
+ end