html5 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +58 -0
- data/README +9 -0
- data/Rakefile.rb +17 -0
- data/lib/html5/constants.rb +818 -0
- data/lib/html5/filters/base.rb +10 -0
- data/lib/html5/filters/inject_meta_charset.rb +82 -0
- data/lib/html5/filters/optionaltags.rb +198 -0
- data/lib/html5/filters/sanitizer.rb +15 -0
- data/lib/html5/filters/whitespace.rb +36 -0
- data/lib/html5/html5parser/after_body_phase.rb +46 -0
- data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
- data/lib/html5/html5parser/after_head_phase.rb +50 -0
- data/lib/html5/html5parser/before_head_phase.rb +41 -0
- data/lib/html5/html5parser/in_body_phase.rb +607 -0
- data/lib/html5/html5parser/in_caption_phase.rb +68 -0
- data/lib/html5/html5parser/in_cell_phase.rb +78 -0
- data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
- data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
- data/lib/html5/html5parser/in_head_phase.rb +138 -0
- data/lib/html5/html5parser/in_row_phase.rb +87 -0
- data/lib/html5/html5parser/in_select_phase.rb +84 -0
- data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
- data/lib/html5/html5parser/in_table_phase.rb +110 -0
- data/lib/html5/html5parser/initial_phase.rb +134 -0
- data/lib/html5/html5parser/phase.rb +158 -0
- data/lib/html5/html5parser/root_element_phase.rb +42 -0
- data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
- data/lib/html5/html5parser.rb +248 -0
- data/lib/html5/inputstream.rb +654 -0
- data/lib/html5/liberalxmlparser.rb +158 -0
- data/lib/html5/sanitizer.rb +188 -0
- data/lib/html5/serializer/htmlserializer.rb +180 -0
- data/lib/html5/serializer/xhtmlserializer.rb +20 -0
- data/lib/html5/serializer.rb +2 -0
- data/lib/html5/tokenizer.rb +968 -0
- data/lib/html5/treebuilders/base.rb +334 -0
- data/lib/html5/treebuilders/hpricot.rb +231 -0
- data/lib/html5/treebuilders/rexml.rb +208 -0
- data/lib/html5/treebuilders/simpletree.rb +185 -0
- data/lib/html5/treebuilders.rb +24 -0
- data/lib/html5/treewalkers/base.rb +154 -0
- data/lib/html5/treewalkers/hpricot.rb +48 -0
- data/lib/html5/treewalkers/rexml.rb +48 -0
- data/lib/html5/treewalkers/simpletree.rb +48 -0
- data/lib/html5/treewalkers.rb +26 -0
- data/lib/html5.rb +13 -0
- data/parse.rb +217 -0
- data/tests/preamble.rb +82 -0
- data/tests/test_encoding.rb +35 -0
- data/tests/test_lxp.rb +263 -0
- data/tests/test_parser.rb +68 -0
- data/tests/test_sanitizer.rb +142 -0
- data/tests/test_serializer.rb +68 -0
- data/tests/test_stream.rb +62 -0
- data/tests/test_tokenizer.rb +94 -0
- data/tests/test_treewalkers.rb +116 -0
- data/tests/tokenizer_test_parser.rb +63 -0
- metadata +120 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'html5/treewalkers/base'
|
2
|
+
|
3
|
+
module HTML5
|
4
|
+
module TreeWalkers
|
5
|
+
|
6
|
+
class << self
|
7
|
+
def [](name)
|
8
|
+
case name.to_s.downcase
|
9
|
+
when 'simpletree'
|
10
|
+
require 'html5/treewalkers/simpletree'
|
11
|
+
SimpleTree::TreeWalker
|
12
|
+
when 'rexml'
|
13
|
+
require 'html5/treewalkers/rexml'
|
14
|
+
REXML::TreeWalker
|
15
|
+
when 'hpricot'
|
16
|
+
require 'html5/treewalkers/hpricot'
|
17
|
+
Hpricot::TreeWalker
|
18
|
+
else
|
19
|
+
raise "Unknown TreeWalker #{name}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
alias :get_tree_walker :[]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/html5.rb
ADDED
data/parse.rb
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Parse a document to a simpletree tree, with optional profiling
|
4
|
+
|
5
|
+
$:.unshift File.dirname(__FILE__),'lib'
|
6
|
+
|
7
|
+
def parse(opts, args)
|
8
|
+
encoding = nil
|
9
|
+
|
10
|
+
f = args[-1]
|
11
|
+
if f
|
12
|
+
begin
|
13
|
+
if f[0..6] == 'http://'
|
14
|
+
require 'open-uri'
|
15
|
+
f = URI.parse(f).open
|
16
|
+
encoding = f.charset
|
17
|
+
elsif f == '-'
|
18
|
+
f = $stdin
|
19
|
+
else
|
20
|
+
f = open(f)
|
21
|
+
end
|
22
|
+
rescue
|
23
|
+
end
|
24
|
+
else
|
25
|
+
$stderr.write("No filename provided. Use -h for help\n")
|
26
|
+
exit(1)
|
27
|
+
end
|
28
|
+
|
29
|
+
require 'html5/treebuilders'
|
30
|
+
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
31
|
+
|
32
|
+
if opts.output == :xml
|
33
|
+
require 'html5/liberalxmlparser'
|
34
|
+
p = HTML5::XHTMLParser.new(:tree=>treebuilder)
|
35
|
+
else
|
36
|
+
require 'html5/html5parser'
|
37
|
+
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
38
|
+
end
|
39
|
+
|
40
|
+
if opts.parsemethod == :parse
|
41
|
+
args = [f, encoding]
|
42
|
+
else
|
43
|
+
args = [f, 'div', encoding]
|
44
|
+
end
|
45
|
+
|
46
|
+
if opts.profile
|
47
|
+
require 'profiler'
|
48
|
+
Profiler__::start_profile
|
49
|
+
p.send(opts.parsemethod, *args)
|
50
|
+
Profiler__::stop_profile
|
51
|
+
Profiler__::print_profile($stderr)
|
52
|
+
elsif opts.time
|
53
|
+
require 'time' # TODO: switch to benchmark
|
54
|
+
t0 = Time.new
|
55
|
+
document = p.send(opts.parsemethod, *args)
|
56
|
+
t1 = Time.new
|
57
|
+
printOutput(p, document, opts)
|
58
|
+
t2 = Time.new
|
59
|
+
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
60
|
+
else
|
61
|
+
document = p.send(opts.parsemethod, *args)
|
62
|
+
printOutput(p, document, opts)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def printOutput(parser, document, opts)
|
67
|
+
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
68
|
+
|
69
|
+
case opts.output
|
70
|
+
when :xml
|
71
|
+
print document
|
72
|
+
when :html
|
73
|
+
require 'html5/treewalkers'
|
74
|
+
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
75
|
+
require 'html5/serializer'
|
76
|
+
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
77
|
+
when :hilite
|
78
|
+
print document.hilite
|
79
|
+
when :tree
|
80
|
+
document = [document] unless document.respond_to?(:each)
|
81
|
+
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
82
|
+
end
|
83
|
+
|
84
|
+
if opts.error
|
85
|
+
errList=[]
|
86
|
+
for pos, message in parser.errors
|
87
|
+
errList << ("Line %i Col %i"%pos + " " + message)
|
88
|
+
end
|
89
|
+
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
require 'ostruct'
|
94
|
+
options = OpenStruct.new
|
95
|
+
options.profile = false
|
96
|
+
options.time = false
|
97
|
+
options.output = :html
|
98
|
+
options.treebuilder = 'simpletree'
|
99
|
+
options.error = false
|
100
|
+
options.encoding = false
|
101
|
+
options.parsemethod = :parse
|
102
|
+
options.serializer = {
|
103
|
+
:encoding => 'utf-8',
|
104
|
+
:omit_optional_tags => false,
|
105
|
+
:inject_meta_charset => false
|
106
|
+
}
|
107
|
+
|
108
|
+
require 'optparse'
|
109
|
+
opts = OptionParser.new do |opts|
|
110
|
+
opts.separator ""
|
111
|
+
opts.separator "Parse Options:"
|
112
|
+
|
113
|
+
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
114
|
+
options.treebuilder = treebuilder
|
115
|
+
end
|
116
|
+
|
117
|
+
opts.on("-f", "--fragment", "Parse as a fragment") do |parse|
|
118
|
+
options.parsemethod = :parse_fragment
|
119
|
+
end
|
120
|
+
|
121
|
+
opts.separator ""
|
122
|
+
opts.separator "Filter Options:"
|
123
|
+
|
124
|
+
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
125
|
+
options.serializer[:inject_meta_charset] = inject
|
126
|
+
end
|
127
|
+
|
128
|
+
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
129
|
+
options.serializer[:strip_whitespace] = strip
|
130
|
+
end
|
131
|
+
|
132
|
+
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
133
|
+
options.serializer[:sanitize] = sanitize
|
134
|
+
end
|
135
|
+
|
136
|
+
opts.separator ""
|
137
|
+
opts.separator "Output Options:"
|
138
|
+
|
139
|
+
opts.on("--tree", "output as debug tree") do |tree|
|
140
|
+
options.output = :tree
|
141
|
+
end
|
142
|
+
|
143
|
+
opts.on("-x", "--xml", "output as xml") do |xml|
|
144
|
+
options.output = :xml
|
145
|
+
options.treebuilder = "rexml"
|
146
|
+
end
|
147
|
+
|
148
|
+
opts.on("--[no-]html", "Output as html") do |html|
|
149
|
+
options.output = (html ? :html : nil)
|
150
|
+
end
|
151
|
+
|
152
|
+
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
153
|
+
options.output = :hilite
|
154
|
+
end
|
155
|
+
|
156
|
+
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
157
|
+
options.error = error
|
158
|
+
end
|
159
|
+
|
160
|
+
opts.separator ""
|
161
|
+
opts.separator "Serialization Options:"
|
162
|
+
|
163
|
+
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
164
|
+
options.serializer[:omit_optional_tags] = omit
|
165
|
+
end
|
166
|
+
|
167
|
+
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
168
|
+
options.serializer[:quote_attr_values] = quote
|
169
|
+
end
|
170
|
+
|
171
|
+
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
172
|
+
options.serializer[:use_best_quote_char] = best
|
173
|
+
end
|
174
|
+
|
175
|
+
opts.on("--quote-char C", "Use specified quote character") do |c|
|
176
|
+
options.serializer[:quote_char] = c
|
177
|
+
end
|
178
|
+
|
179
|
+
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
180
|
+
options.serializer[:minimize_boolean_attributes] = min
|
181
|
+
end
|
182
|
+
|
183
|
+
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
184
|
+
options.serializer[:use_trailing_solidus] = slash
|
185
|
+
end
|
186
|
+
|
187
|
+
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
188
|
+
options.serializer[:escape_lt_in_attrs] = lt
|
189
|
+
end
|
190
|
+
|
191
|
+
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
192
|
+
options.serializer[:escape_rcdata] = rcdata
|
193
|
+
end
|
194
|
+
|
195
|
+
opts.separator ""
|
196
|
+
opts.separator "Other Options:"
|
197
|
+
|
198
|
+
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
199
|
+
options.profile = profile
|
200
|
+
end
|
201
|
+
|
202
|
+
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
203
|
+
options.time = time
|
204
|
+
end
|
205
|
+
|
206
|
+
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
207
|
+
options.encoding = encoding
|
208
|
+
end
|
209
|
+
|
210
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
211
|
+
puts opts
|
212
|
+
exit
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
opts.parse!(ARGV)
|
217
|
+
parse options, ARGV
|
data/tests/preamble.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
4
|
+
|
5
|
+
if File.exists?(File.join(HTML5_BASE, 'testdata'))
|
6
|
+
TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
|
7
|
+
else
|
8
|
+
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
|
9
|
+
end
|
10
|
+
|
11
|
+
$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
|
12
|
+
|
13
|
+
$:.unshift File.dirname(__FILE__)
|
14
|
+
|
15
|
+
def html5_test_files(subdirectory)
|
16
|
+
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
|
17
|
+
end
|
18
|
+
|
19
|
+
begin
|
20
|
+
require 'rubygems'
|
21
|
+
require 'json'
|
22
|
+
rescue LoadError
|
23
|
+
class JSON
|
24
|
+
def self.parse json
|
25
|
+
json.gsub!(/\$/, "\\$")
|
26
|
+
json.gsub!(/"\s*:/, '"=>')
|
27
|
+
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
28
|
+
null = nil
|
29
|
+
eval json
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module HTML5
|
35
|
+
module TestSupport
|
36
|
+
# convert the output of str(document) to the format used in the testcases
|
37
|
+
def convertTreeDump(treedump)
|
38
|
+
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
|
39
|
+
end
|
40
|
+
|
41
|
+
def sortattrs(output)
|
42
|
+
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
|
43
|
+
match.split("\n").sort.join("\n")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class TestData
|
48
|
+
include Enumerable
|
49
|
+
|
50
|
+
def initialize(filename, sections)
|
51
|
+
@f = open(filename)
|
52
|
+
@sections = sections
|
53
|
+
end
|
54
|
+
|
55
|
+
def each
|
56
|
+
data = {}
|
57
|
+
key=nil
|
58
|
+
@f.each_line do |line|
|
59
|
+
if line[0] == ?# and @sections.include?(line[1..-2])
|
60
|
+
heading = line[1..-2]
|
61
|
+
if data.any? and heading == @sections[0]
|
62
|
+
data[key].chomp! #Remove trailing newline
|
63
|
+
yield normaliseOutput(data)
|
64
|
+
data = {}
|
65
|
+
end
|
66
|
+
key = heading
|
67
|
+
data[key]=""
|
68
|
+
elsif key
|
69
|
+
data[key] += line
|
70
|
+
end
|
71
|
+
end
|
72
|
+
yield normaliseOutput(data) if data
|
73
|
+
end
|
74
|
+
|
75
|
+
def normaliseOutput(data)
|
76
|
+
#Remove trailing newlines
|
77
|
+
data.keys.each { |key| data[key].chomp! }
|
78
|
+
@sections.map {|heading| data[heading]}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
require 'html5/inputstream'
|
4
|
+
|
5
|
+
class Html5EncodingTestCase < Test::Unit::TestCase
|
6
|
+
include HTML5
|
7
|
+
include TestSupport
|
8
|
+
|
9
|
+
begin
|
10
|
+
require 'rubygems'
|
11
|
+
require 'UniversalDetector'
|
12
|
+
|
13
|
+
def test_chardet
|
14
|
+
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
|
15
|
+
stream = HTML5::HTMLInputStream.new(file, :chardet => true)
|
16
|
+
assert_equal 'big5', stream.char_encoding.downcase
|
17
|
+
rescue LoadError
|
18
|
+
puts "chardet not found, skipping chardet tests"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
html5_test_files('encoding').each do |test_file|
|
23
|
+
test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
|
24
|
+
|
25
|
+
TestData.new(test_file, %w(data encoding)).
|
26
|
+
each_with_index do |(input, encoding), index|
|
27
|
+
|
28
|
+
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
29
|
+
stream = HTML5::HTMLInputStream.new(input, :chardet => false)
|
30
|
+
assert_equal encoding.downcase, stream.char_encoding.downcase, input
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
data/tests/test_lxp.rb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
require 'html5/liberalxmlparser'
|
4
|
+
|
5
|
+
XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
|
6
|
+
|
7
|
+
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
|
8
|
+
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
|
9
|
+
document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
|
10
|
+
if not expected
|
11
|
+
expected = input.chomp.gsub(XMLELEM,&sortattrs)
|
12
|
+
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
|
13
|
+
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
|
14
|
+
assert_equal(expected, output)
|
15
|
+
else
|
16
|
+
assert_equal(expected, document.to_s.gsub(/'/,'"'))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
|
21
|
+
assert_xml_equal(input, expected, parser)
|
22
|
+
end
|
23
|
+
|
24
|
+
class BasicXhtml5Test < Test::Unit::TestCase
|
25
|
+
|
26
|
+
def test_title_body_mismatched_close
|
27
|
+
assert_xhtml_equal(
|
28
|
+
'<title>Xhtml</title><b><i>content</b></i>',
|
29
|
+
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
30
|
+
'<head><title>Xhtml</title></head>' +
|
31
|
+
'<body><b><i>content</i></b></body>' +
|
32
|
+
'</html>')
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_title_body_named_charref
|
36
|
+
assert_xhtml_equal(
|
37
|
+
'<title>ntilde</title>A ñ B',
|
38
|
+
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
39
|
+
'<head><title>ntilde</title></head>' +
|
40
|
+
'<body>A '+ [0xF1].pack('U') + ' B</body>' +
|
41
|
+
'</html>')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class BasicXmlTest < Test::Unit::TestCase
|
46
|
+
|
47
|
+
def test_comment
|
48
|
+
assert_xml_equal("<x><!-- foo --></x>")
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_cdata
|
52
|
+
assert_xml_equal("<x><![CDATA[foo]]></x>","<x>foo</x>")
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_simple_text
|
56
|
+
assert_xml_equal("<p>foo</p>","<p>foo</p>")
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_optional_close
|
60
|
+
assert_xml_equal("<p>foo","<p>foo</p>")
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_html_mismatched
|
64
|
+
assert_xml_equal("<b><i>foo</b></i>","<b><i>foo</i></b>")
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class OpmlTest < Test::Unit::TestCase
|
69
|
+
|
70
|
+
def test_mixedCaseElement
|
71
|
+
assert_xml_equal(
|
72
|
+
'<opml version="1.0">' +
|
73
|
+
'<head><ownerName>Dave Winer</ownerName></head>' +
|
74
|
+
'</opml>')
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_mixedCaseAttribute
|
78
|
+
assert_xml_equal(
|
79
|
+
'<opml version="1.0">' +
|
80
|
+
'<body><outline isComment="true"/></body>' +
|
81
|
+
'</opml>')
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_malformed
|
85
|
+
assert_xml_equal(
|
86
|
+
'<opml version="1.0">' +
|
87
|
+
'<body><outline text="Odds & Ends"/></body>' +
|
88
|
+
'</opml>',
|
89
|
+
'<opml version="1.0">' +
|
90
|
+
'<body><outline text="Odds & Ends"/></body>' +
|
91
|
+
'</opml>')
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class XhtmlTest < Test::Unit::TestCase
|
96
|
+
|
97
|
+
def test_mathml
|
98
|
+
assert_xhtml_equal <<EOX
|
99
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
100
|
+
<head><title>MathML</title></head>
|
101
|
+
<body>
|
102
|
+
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
103
|
+
<mrow>
|
104
|
+
<mi>x</mi>
|
105
|
+
<mo>=</mo>
|
106
|
+
|
107
|
+
<mfrac>
|
108
|
+
<mrow>
|
109
|
+
<mrow>
|
110
|
+
<mo>-</mo>
|
111
|
+
<mi>b</mi>
|
112
|
+
</mrow>
|
113
|
+
<mo>±</mo>
|
114
|
+
<msqrt>
|
115
|
+
|
116
|
+
<mrow>
|
117
|
+
<msup>
|
118
|
+
<mi>b</mi>
|
119
|
+
<mn>2</mn>
|
120
|
+
</msup>
|
121
|
+
<mo>-</mo>
|
122
|
+
<mrow>
|
123
|
+
|
124
|
+
<mn>4</mn>
|
125
|
+
<mo>⁢</mo>
|
126
|
+
<mi>a</mi>
|
127
|
+
<mo>⁢</mo>
|
128
|
+
<mi>c</mi>
|
129
|
+
</mrow>
|
130
|
+
</mrow>
|
131
|
+
|
132
|
+
</msqrt>
|
133
|
+
</mrow>
|
134
|
+
<mrow>
|
135
|
+
<mn>2</mn>
|
136
|
+
<mo>⁢</mo>
|
137
|
+
<mi>a</mi>
|
138
|
+
</mrow>
|
139
|
+
</mfrac>
|
140
|
+
|
141
|
+
</mrow>
|
142
|
+
</math>
|
143
|
+
</body></html>
|
144
|
+
EOX
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_svg
|
148
|
+
assert_xhtml_equal <<EOX
|
149
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
150
|
+
<head><title>SVG</title></head>
|
151
|
+
<body>
|
152
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
153
|
+
<path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
|
154
|
+
c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
|
155
|
+
</path>
|
156
|
+
<circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
|
157
|
+
</circle>
|
158
|
+
|
159
|
+
</svg>
|
160
|
+
</body></html>
|
161
|
+
EOX
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_xlink
|
165
|
+
assert_xhtml_equal <<EOX
|
166
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
167
|
+
<head><title>XLINK</title></head>
|
168
|
+
<body>
|
169
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
170
|
+
<defs xmlns:l="http://www.w3.org/1999/xlink">
|
171
|
+
<radialGradient id="s1" fx=".4" fy=".2" r=".7">
|
172
|
+
<stop stop-color="#FE8"/>
|
173
|
+
<stop stop-color="#D70" offset="1"/>
|
174
|
+
</radialGradient>
|
175
|
+
<radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
|
176
|
+
<radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
|
177
|
+
<radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
|
178
|
+
</defs>
|
179
|
+
<g stroke="#940">
|
180
|
+
<path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
|
181
|
+
<path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
|
182
|
+
<path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
|
183
|
+
|
184
|
+
<path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
|
185
|
+
<path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
|
186
|
+
<path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
|
187
|
+
</g>
|
188
|
+
</svg>
|
189
|
+
</body></html>
|
190
|
+
EOX
|
191
|
+
end
|
192
|
+
|
193
|
+
def test_br
|
194
|
+
assert_xhtml_equal <<EOX1
|
195
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
196
|
+
<head><title>BR</title></head>
|
197
|
+
<body>
|
198
|
+
<br/>
|
199
|
+
</body></html>
|
200
|
+
EOX1
|
201
|
+
end
|
202
|
+
|
203
|
+
def test_strong
|
204
|
+
assert_xhtml_equal <<EOX
|
205
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
206
|
+
<head><title>STRONG</title></head>
|
207
|
+
<body>
|
208
|
+
<strong></strong>
|
209
|
+
</body></html>
|
210
|
+
EOX
|
211
|
+
end
|
212
|
+
|
213
|
+
def test_script
|
214
|
+
assert_xhtml_equal <<EOX
|
215
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
216
|
+
<head><title>SCRIPT</title></head>
|
217
|
+
<body>
|
218
|
+
<script>1 < 2 & 3</script>
|
219
|
+
</body></html>
|
220
|
+
EOX
|
221
|
+
end
|
222
|
+
|
223
|
+
def test_script_src
|
224
|
+
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
225
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
226
|
+
<head><title>SCRIPT</title><script src="http://example.com"/></head>
|
227
|
+
<body>
|
228
|
+
<script>1 < 2 & 3</script>
|
229
|
+
</body></html>
|
230
|
+
EOX1
|
231
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
232
|
+
<head><title>SCRIPT</title><script src="http://example.com"></script></head>
|
233
|
+
<body>
|
234
|
+
<script>1 < 2 & 3</script>
|
235
|
+
</body></html>
|
236
|
+
EOX2
|
237
|
+
end
|
238
|
+
|
239
|
+
def test_title
|
240
|
+
assert_xhtml_equal <<EOX
|
241
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
242
|
+
<head><title>1 < 2 & 3</title></head>
|
243
|
+
<body>
|
244
|
+
</body></html>
|
245
|
+
EOX
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_prolog
|
249
|
+
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
250
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
251
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
252
|
+
<head><title>PROLOG</title></head>
|
253
|
+
<body>
|
254
|
+
</body></html>
|
255
|
+
EOX1
|
256
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
257
|
+
<head><title>PROLOG</title></head>
|
258
|
+
<body>
|
259
|
+
</body></html>
|
260
|
+
EOX2
|
261
|
+
end
|
262
|
+
|
263
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'preamble')
|
2
|
+
|
3
|
+
require 'html5/treebuilders'
|
4
|
+
require 'html5/html5parser'
|
5
|
+
|
6
|
+
|
7
|
+
$tree_types_to_test = ['simpletree', 'rexml']
|
8
|
+
|
9
|
+
begin
|
10
|
+
require 'hpricot'
|
11
|
+
$tree_types_to_test.push('hpricot')
|
12
|
+
rescue LoadError
|
13
|
+
end
|
14
|
+
|
15
|
+
$CHECK_PARSER_ERRORS = ARGV.delete('-p') # TODO
|
16
|
+
|
17
|
+
puts 'Testing tree builders: ' + $tree_types_to_test * ', '
|
18
|
+
|
19
|
+
|
20
|
+
class Html5ParserTestCase < Test::Unit::TestCase
|
21
|
+
include HTML5
|
22
|
+
include TestSupport
|
23
|
+
|
24
|
+
html5_test_files('tree-construction').each do |test_file|
|
25
|
+
|
26
|
+
test_name = File.basename(test_file).sub('.dat', '')
|
27
|
+
|
28
|
+
TestData.new(test_file, %w(data errors document-fragment document)).
|
29
|
+
each_with_index do |(input, errors, inner_html, expected), index|
|
30
|
+
|
31
|
+
errors = errors.split("\n")
|
32
|
+
expected = expected.gsub("\n| ","\n")[2..-1]
|
33
|
+
|
34
|
+
$tree_types_to_test.each do |tree_name|
|
35
|
+
define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
|
36
|
+
|
37
|
+
parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
|
38
|
+
|
39
|
+
if inner_html
|
40
|
+
parser.parse_fragment(input, inner_html)
|
41
|
+
else
|
42
|
+
parser.parse(input)
|
43
|
+
end
|
44
|
+
|
45
|
+
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
|
46
|
+
|
47
|
+
assert_equal sortattrs(expected), sortattrs(actual_output), [
|
48
|
+
'', 'Input:', input,
|
49
|
+
'', 'Expected:', expected,
|
50
|
+
'', 'Recieved:', actual_output
|
51
|
+
].join("\n")
|
52
|
+
|
53
|
+
actual_errors = parser.errors.map do |(line, col), message|
|
54
|
+
'Line: %i Col: %i %s' % [line, col, message]
|
55
|
+
end
|
56
|
+
assert_equal errors.length, parser.errors.length, [
|
57
|
+
'', 'Input', input,
|
58
|
+
'', "Expected errors (#{errors.length}):", errors.join("\n"),
|
59
|
+
'', "Actual errors (#{actual_errors.length}):",
|
60
|
+
actual_errors.join("\n")
|
61
|
+
].join("\n")
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|