ox 1.2.15 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/{README.rdoc → README.md} +92 -64
- data/ext/ox/dump.c +7 -7
- data/ext/ox/ox.c +43 -10
- data/ext/ox/ox.h +11 -0
- data/ext/ox/sax.c +758 -0
- data/lib/ox.rb +1 -0
- data/lib/ox/sax.rb +72 -0
- data/lib/ox/version.rb +1 -1
- data/test/func.rb +24 -1
- data/test/perf_sax.rb +233 -0
- data/test/sax_test.rb +381 -0
- metadata +19 -7
data/lib/ox.rb
CHANGED
data/lib/ox/sax.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module Ox
|
2
|
+
# A SAX style parse handler. The Ox::Sax handler class should be subclasses
|
3
|
+
# and then used with the Ox.sax_parse() method. The Sax methods will then be
|
4
|
+
# called as the file is parsed. This is best suited for very large files or
|
5
|
+
# IO streams.<p/>
|
6
|
+
# @example
|
7
|
+
#
|
8
|
+
# require 'ox'
|
9
|
+
#
|
10
|
+
# class MySax < ::Ox::Sax
|
11
|
+
# def initialize()
|
12
|
+
# @element_name = []
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# def start_element(name, attrs)
|
16
|
+
# @element_names << name
|
17
|
+
# end
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# any = MySax.new()
|
21
|
+
# File.open('any.xml', 'r') do |f|
|
22
|
+
# Xml.sax_parse(any, f)
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# To make the desired methods active while parsing the desired method should
|
26
|
+
# be made public in the subclasses. If the methods remain private they will
|
27
|
+
# not be called during parsing.
|
28
|
+
#
|
29
|
+
# def instruct(target, attrs); end
|
30
|
+
# def doctype(value); end
|
31
|
+
# def comment(value); end
|
32
|
+
# def cdata(value); end
|
33
|
+
# def text(value); end
|
34
|
+
# def start_element(name, attrs); end
|
35
|
+
# def end_element(name); end
|
36
|
+
#
|
37
|
+
class Sax
|
38
|
+
# Create a new instance of the Sax handler class.
|
39
|
+
def initialize()
|
40
|
+
end
|
41
|
+
|
42
|
+
# To make the desired methods active while parsing the desired method
|
43
|
+
# should be made public in the subclasses. If the methods remain private
|
44
|
+
# they will not be called during parsing.
|
45
|
+
private
|
46
|
+
|
47
|
+
def instruct(target, attrs)
|
48
|
+
end
|
49
|
+
|
50
|
+
def doctype(value)
|
51
|
+
end
|
52
|
+
|
53
|
+
def comment(value)
|
54
|
+
end
|
55
|
+
|
56
|
+
def cdata(value)
|
57
|
+
end
|
58
|
+
|
59
|
+
def text(value)
|
60
|
+
end
|
61
|
+
|
62
|
+
def start_element(name, attrs)
|
63
|
+
end
|
64
|
+
|
65
|
+
def end_element(name)
|
66
|
+
end
|
67
|
+
|
68
|
+
def error(message, line, column)
|
69
|
+
end
|
70
|
+
|
71
|
+
end # Sax
|
72
|
+
end # Ox
|
data/lib/ox/version.rb
CHANGED
data/test/func.rb
CHANGED
@@ -5,7 +5,7 @@ $: << '../lib'
|
|
5
5
|
$: << '../ext'
|
6
6
|
|
7
7
|
if __FILE__ == $0
|
8
|
-
|
8
|
+
while (i = ARGV.index('-I'))
|
9
9
|
x,path = ARGV.slice!(i, 2)
|
10
10
|
$: << path
|
11
11
|
end
|
@@ -333,6 +333,29 @@ class Func < ::Test::Unit::TestCase
|
|
333
333
|
loaded
|
334
334
|
end
|
335
335
|
|
336
|
+
def test_nameerror
|
337
|
+
begin
|
338
|
+
"x".foo
|
339
|
+
rescue Exception => e
|
340
|
+
xml = Ox.dump(e, :effort => :tolerant)
|
341
|
+
o = Ox.load(xml, mode: :object)
|
342
|
+
xml2 = Ox.dump(o, :effort => :tolerant)
|
343
|
+
assert_equal(xml, xml2)
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
def test_mutex
|
348
|
+
# Mutex can not be serialize but it should not raise an exception.
|
349
|
+
xml = Ox.dump(Mutex.new, :indent => 2, :effort => :tolerant)
|
350
|
+
assert_equal(%{<z/>
|
351
|
+
}, xml)
|
352
|
+
xml = Ox.dump(Bag.new(:@x => Mutex.new), :indent => 2, :effort => :tolerant)
|
353
|
+
assert_equal(%{<o c="Bag">
|
354
|
+
<z a="@x"/>
|
355
|
+
</o>
|
356
|
+
}, xml)
|
357
|
+
end
|
358
|
+
|
336
359
|
def test_encoding
|
337
360
|
if RUBY_VERSION.start_with?('1.8')
|
338
361
|
assert(true)
|
data/test/perf_sax.rb
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
#!/usr/bin/env ruby -wW1
|
2
|
+
|
3
|
+
$: << '.'
|
4
|
+
$: << '..'
|
5
|
+
$: << '../lib'
|
6
|
+
$: << '../ext'
|
7
|
+
|
8
|
+
if __FILE__ == $0
|
9
|
+
while (i = ARGV.index('-I'))
|
10
|
+
x,path = ARGV.slice!(i, 2)
|
11
|
+
$: << path
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
require 'optparse'
|
16
|
+
require 'ox'
|
17
|
+
require 'sample'
|
18
|
+
require 'files'
|
19
|
+
begin
|
20
|
+
require 'nokogiri'
|
21
|
+
rescue Exception => e
|
22
|
+
end
|
23
|
+
begin
|
24
|
+
require 'libxml'
|
25
|
+
rescue Exception => e
|
26
|
+
end
|
27
|
+
|
28
|
+
$verbose = 0
|
29
|
+
$ox_only = false
|
30
|
+
$all_cbs = false
|
31
|
+
$filename = nil # nil indicates new file names perf.xml will be created and used
|
32
|
+
$filesize = 1000 # KBytes
|
33
|
+
$iter = 100
|
34
|
+
|
35
|
+
opts = OptionParser.new
|
36
|
+
opts.on("-v", "increase verbosity") { $verbose += 1 }
|
37
|
+
opts.on("-x", "ox only") { $ox_only = true }
|
38
|
+
opts.on("-a", "all callbacks") { $all_cbs = true }
|
39
|
+
opts.on("-f", "--file [String]", String, "filename") { |f| $filename = f }
|
40
|
+
opts.on("-i", "--iterations [Int]", Integer, "iterations") { |i| $iter = i }
|
41
|
+
opts.on("-s", "--size [Int]", Integer, "file size in KBytes") { |s| $filesize = s }
|
42
|
+
opts.on("-h", "--help", "Show this display") { puts opts; Process.exit!(0) }
|
43
|
+
rest = opts.parse(ARGV)
|
44
|
+
|
45
|
+
$xml_str = nil
|
46
|
+
$ox_time = 0
|
47
|
+
$no_time = 0
|
48
|
+
$lx_time = 0
|
49
|
+
|
50
|
+
# size is in Kbytes
|
51
|
+
def create_file(filename, size)
|
52
|
+
head = %{<?xml version="1.0"?>
|
53
|
+
<?ox version="1.0" mode="object" circular="no" xsd_date="no"?>
|
54
|
+
<!DOCTYPE table PUBLIC "-//ox//DTD TABLE 1.0//EN" "http://www.ohler.com/DTDs/TestTable-1.0.dtd">
|
55
|
+
<table>
|
56
|
+
}
|
57
|
+
tail = %{</table>
|
58
|
+
}
|
59
|
+
row = %{ <!-- row %08d element -->
|
60
|
+
<row id="%08d">
|
61
|
+
<cell id="A" type="Fixnum">1234</cell>
|
62
|
+
<cell id="B" type="String">A string.</cell>
|
63
|
+
<cell id="C" type="String">This is a longer string that stretches over a larger number of characters.</cell>
|
64
|
+
<cell id="D" type="Float">-12.345</cell>
|
65
|
+
<cell id="E" type="Date">2011-09-18 23:07:26 +0900</cell>
|
66
|
+
<cell id="F" type="Image"><![CDATA[xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00]]></cell>
|
67
|
+
</row>
|
68
|
+
}
|
69
|
+
cnt = (size * 1000 - head.size - tail.size) / row.size
|
70
|
+
File.open(filename, "w") do |f|
|
71
|
+
f.write(head)
|
72
|
+
cnt.times do |i|
|
73
|
+
f.write(row % [i,i])
|
74
|
+
end
|
75
|
+
f.write(tail)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class OxSax < ::Ox::Sax
|
80
|
+
def start_element(name, attrs); end
|
81
|
+
def error(message, line, column); puts message; end
|
82
|
+
end
|
83
|
+
|
84
|
+
class OxAllSax < OxSax
|
85
|
+
def end_element(name); end
|
86
|
+
def instruct(target, attrs); end
|
87
|
+
def doctype(value); end
|
88
|
+
def comment(value); end
|
89
|
+
def cdata(value); end
|
90
|
+
def text(value); end
|
91
|
+
end
|
92
|
+
|
93
|
+
unless defined?(::Nokogiri).nil?
|
94
|
+
class NoSax < Nokogiri::XML::SAX::Document
|
95
|
+
def start_element(name, attrs = []); end
|
96
|
+
def error(message); puts message; end
|
97
|
+
def warning(message); puts message; end
|
98
|
+
end
|
99
|
+
class NoAllSax < NoSax
|
100
|
+
def characters(text); end
|
101
|
+
def cdata_block(string); end
|
102
|
+
def comment(string); end
|
103
|
+
def end_document(); end
|
104
|
+
def end_element(name); end
|
105
|
+
def start_document(); end
|
106
|
+
def xmldecl(version, encoding, standalone); end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
unless defined?(::LibXML).nil?
|
111
|
+
class LxSax
|
112
|
+
include LibXML::XML::SaxParser::Callbacks
|
113
|
+
def on_start_element(element, attributes); end
|
114
|
+
end
|
115
|
+
class LxAllSax < LxSax
|
116
|
+
def on_cdata_block(cdata); end
|
117
|
+
def on_characters(chars); end
|
118
|
+
def on_comment(msg); end
|
119
|
+
def on_end_document(); end
|
120
|
+
def on_end_element(element); end
|
121
|
+
def on_end_element_ns(name, prefix, uri); end
|
122
|
+
def on_error(msg); end
|
123
|
+
def on_external_subset(name, external_id, system_id); end
|
124
|
+
def on_has_external_subset(); end
|
125
|
+
def on_has_internal_subset(); end
|
126
|
+
def on_internal_subset(name, external_id, system_id); end
|
127
|
+
def on_is_standalone(); end
|
128
|
+
def on_processing_instruction(target, data); end
|
129
|
+
def on_reference(name); end
|
130
|
+
def on_start_document(); end
|
131
|
+
def on_start_element_ns(name, attributes, prefix, uri, namespaces); end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def perf_stringio()
|
136
|
+
start = Time.now
|
137
|
+
handler = $all_cbs ? OxAllSax.new() : OxSax.new()
|
138
|
+
$iter.times do
|
139
|
+
input = StringIO.new($xml_str)
|
140
|
+
Ox.sax_parse(handler, input)
|
141
|
+
input.close
|
142
|
+
end
|
143
|
+
$ox_time = Time.now - start
|
144
|
+
puts "StringIO SAX parsing #{$iter} times with Ox took #{$ox_time} seconds."
|
145
|
+
|
146
|
+
return if $ox_only
|
147
|
+
|
148
|
+
unless defined?(::Nokogiri).nil?
|
149
|
+
handler = Nokogiri::XML::SAX::Parser.new($all_cbs ? NoAllSax.new() : NoSax.new())
|
150
|
+
start = Time.now
|
151
|
+
$iter.times do
|
152
|
+
input = StringIO.new($xml_str)
|
153
|
+
handler.parse(input)
|
154
|
+
input.close
|
155
|
+
end
|
156
|
+
$no_time = Time.now - start
|
157
|
+
puts "StringIO SAX parsing #{$iter} times with Nokogiri took #{$no_time} seconds."
|
158
|
+
end
|
159
|
+
|
160
|
+
unless defined?(::LibXML).nil?
|
161
|
+
start = Time.now
|
162
|
+
$iter.times do
|
163
|
+
input = StringIO.new($xml_str)
|
164
|
+
parser = LibXML::XML::SaxParser.io(input)
|
165
|
+
parser.callbacks = $all_cbs ? LxAllSax.new() : LxSax.new()
|
166
|
+
parser.parse
|
167
|
+
input.close
|
168
|
+
end
|
169
|
+
$lx_time = Time.now - start
|
170
|
+
puts "StringIO SAX parsing #{$iter} times with LibXML took #{$lx_time} seconds."
|
171
|
+
end
|
172
|
+
|
173
|
+
puts "\n"
|
174
|
+
puts ">>> Ox is %0.1f faster than Nokogiri SAX parsing using StringIO." % [$no_time/$ox_time] unless defined?(::Nokogiri).nil?
|
175
|
+
puts ">>> Ox is %0.1f faster than LibXML SAX parsing using StringIO." % [$lx_time/$ox_time] unless defined?(::LibXML).nil?
|
176
|
+
puts "\n"
|
177
|
+
end
|
178
|
+
|
179
|
+
def perf_fileio()
|
180
|
+
puts "\n"
|
181
|
+
puts "A #{$filesize} KByte XML file was parsed #{$iter} for this test."
|
182
|
+
puts "\n"
|
183
|
+
start = Time.now
|
184
|
+
handler = $all_cbs ? OxAllSax.new() : OxSax.new()
|
185
|
+
$iter.times do
|
186
|
+
input = IO.open(IO.sysopen($filename))
|
187
|
+
Ox.sax_parse(handler, input)
|
188
|
+
input.close
|
189
|
+
end
|
190
|
+
$ox_time = Time.now - start
|
191
|
+
puts "File IO SAX parsing #{$iter} times with Ox took #{$ox_time} seconds."
|
192
|
+
|
193
|
+
return if $ox_only
|
194
|
+
|
195
|
+
unless defined?(::Nokogiri).nil?
|
196
|
+
handler = Nokogiri::XML::SAX::Parser.new($all_cbs ? NoAllSax.new() : NoSax.new())
|
197
|
+
start = Time.now
|
198
|
+
$iter.times do
|
199
|
+
input = IO.open(IO.sysopen($filename))
|
200
|
+
handler.parse(input)
|
201
|
+
input.close
|
202
|
+
end
|
203
|
+
$no_time = Time.now - start
|
204
|
+
puts "File IO SAX parsing #{$iter} times with Nokogiri took #{$no_time} seconds."
|
205
|
+
end
|
206
|
+
|
207
|
+
unless defined?(::LibXML).nil?
|
208
|
+
start = Time.now
|
209
|
+
$iter.times do
|
210
|
+
input = IO.open(IO.sysopen($filename))
|
211
|
+
parser = LibXML::XML::SaxParser.io(input)
|
212
|
+
parser.callbacks = $all_cbs ? LxAllSax.new() : LxSax.new()
|
213
|
+
parser.parse
|
214
|
+
input.close
|
215
|
+
end
|
216
|
+
$lx_time = Time.now - start
|
217
|
+
puts "File IO SAX parsing #{$iter} times with LibXML took #{$lx_time} seconds."
|
218
|
+
end
|
219
|
+
|
220
|
+
puts "\n"
|
221
|
+
puts ">>> Ox is %0.1f faster than Nokogiri SAX parsing using file IO." % [$no_time/$ox_time] unless defined?(::Nokogiri).nil?
|
222
|
+
puts ">>> Ox is %0.1f faster than LibXML SAX parsing using file IO." % [$lx_time/$ox_time] unless defined?(::LibXML).nil?
|
223
|
+
puts "\n"
|
224
|
+
end
|
225
|
+
|
226
|
+
if $filename.nil?
|
227
|
+
create_file('perf.xml', $filesize)
|
228
|
+
$filename = 'perf.xml'
|
229
|
+
end
|
230
|
+
$xml_str = File.read($filename)
|
231
|
+
|
232
|
+
# perf_stringio()
|
233
|
+
perf_fileio()
|
data/test/sax_test.rb
ADDED
@@ -0,0 +1,381 @@
|
|
1
|
+
#!/usr/bin/env ruby -wW1
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
$: << '../lib'
|
5
|
+
$: << '../ext'
|
6
|
+
|
7
|
+
if __FILE__ == $0
|
8
|
+
while (i = ARGV.index('-I'))
|
9
|
+
x,path = ARGV.slice!(i, 2)
|
10
|
+
$: << path
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'stringio'
|
15
|
+
require 'test/unit'
|
16
|
+
require 'optparse'
|
17
|
+
require 'ox'
|
18
|
+
|
19
|
+
opts = OptionParser.new
|
20
|
+
opts.on("-h", "--help", "Show this display") { puts opts; Process.exit!(0) }
|
21
|
+
files = opts.parse(ARGV)
|
22
|
+
|
23
|
+
class StartSax < ::Ox::Sax
|
24
|
+
attr_accessor :calls
|
25
|
+
|
26
|
+
def initialize()
|
27
|
+
@calls = []
|
28
|
+
end
|
29
|
+
|
30
|
+
def start_element(name, attrs)
|
31
|
+
@calls << [:start_element, name, attrs]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class AllSax < StartSax
|
36
|
+
def initialize()
|
37
|
+
super
|
38
|
+
end
|
39
|
+
|
40
|
+
def instruct(target, attrs)
|
41
|
+
@calls << [:instruct, target, attrs]
|
42
|
+
end
|
43
|
+
|
44
|
+
def doctype(value)
|
45
|
+
@calls << [:doctype, value]
|
46
|
+
end
|
47
|
+
|
48
|
+
def comment(value)
|
49
|
+
@calls << [:comment, value]
|
50
|
+
end
|
51
|
+
|
52
|
+
def cdata(value)
|
53
|
+
@calls << [:cdata, value]
|
54
|
+
end
|
55
|
+
|
56
|
+
def text(value)
|
57
|
+
@calls << [:text, value]
|
58
|
+
end
|
59
|
+
|
60
|
+
def end_element(name)
|
61
|
+
@calls << [:end_element, name]
|
62
|
+
end
|
63
|
+
|
64
|
+
def error(message, line, column)
|
65
|
+
@calls << [:error, message, line, column]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class Func < ::Test::Unit::TestCase
|
70
|
+
|
71
|
+
def test_sax_io_pipe
|
72
|
+
handler = AllSax.new()
|
73
|
+
input,w = IO.pipe
|
74
|
+
w << %{<top/>}
|
75
|
+
w.close
|
76
|
+
Ox.sax_parse(handler, input)
|
77
|
+
assert_equal(handler.calls,
|
78
|
+
[[:start_element, :top, nil],
|
79
|
+
[:end_element, :top]])
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_sax_io_file
|
83
|
+
handler = AllSax.new()
|
84
|
+
input = IO.open(IO.sysopen('basic.xml'))
|
85
|
+
Ox.sax_parse(handler, input)
|
86
|
+
assert_equal(handler.calls,
|
87
|
+
[[:start_element, :top, nil],
|
88
|
+
[:end_element, :top]])
|
89
|
+
end
|
90
|
+
|
91
|
+
def parse_compare(xml, expected, handler_class=AllSax)
|
92
|
+
handler = handler_class.new()
|
93
|
+
input = StringIO.new(xml)
|
94
|
+
Ox.sax_parse(handler, input)
|
95
|
+
assert_equal(expected, handler.calls)
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_sax_instruct_simple
|
99
|
+
parse_compare(%{<?xml?>}, [[:instruct, 'xml', nil]])
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_sax_instruct_blank
|
103
|
+
parse_compare(%{<?xml?>}, [], StartSax)
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_sax_instruct_attrs
|
107
|
+
parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>},
|
108
|
+
[[:instruct, 'xml', {:version => '1.0', :encoding => 'UTF-8'}]])
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_sax_instruct_loose
|
112
|
+
parse_compare(%{<? xml
|
113
|
+
version = "1.0"
|
114
|
+
encoding = "UTF-8" ?>},
|
115
|
+
[[:instruct, 'xml', {:version => '1.0', :encoding => 'UTF-8'}]])
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_sax_element_simple
|
119
|
+
parse_compare(%{<top/>},
|
120
|
+
[[:start_element, :top, nil],
|
121
|
+
[:end_element, :top]])
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_sax_element_attrs
|
125
|
+
parse_compare(%{<top x="57" y="42"/>},
|
126
|
+
[[:start_element, :top, {:x => '57', :y => '42'}],
|
127
|
+
[:end_element, :top]])
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_sax_two_top
|
131
|
+
parse_compare(%{<top/><top/>},
|
132
|
+
[[:start_element, :top, nil],
|
133
|
+
[:end_element, :top],
|
134
|
+
[:error, "invalid format, multiple top level elements", 1, 9],
|
135
|
+
[:start_element, :top, nil],
|
136
|
+
[:end_element, :top]])
|
137
|
+
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_sax_nested1
|
142
|
+
parse_compare(%{<?xml version="1.0"?>
|
143
|
+
<top>
|
144
|
+
<child>
|
145
|
+
<grandchild/>
|
146
|
+
</child>
|
147
|
+
</top>
|
148
|
+
},
|
149
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
150
|
+
[:start_element, :top, nil],
|
151
|
+
[:start_element, :child, nil],
|
152
|
+
[:start_element, :grandchild, nil],
|
153
|
+
[:end_element, :grandchild],
|
154
|
+
[:end_element, :child],
|
155
|
+
[:end_element, :top],
|
156
|
+
])
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_sax_nested1_tight
|
160
|
+
parse_compare(%{<?xml version="1.0"?><top><child><grandchild/></child></top>},
|
161
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
162
|
+
[:start_element, :top, nil],
|
163
|
+
[:start_element, :child, nil],
|
164
|
+
[:start_element, :grandchild, nil],
|
165
|
+
[:end_element, :grandchild],
|
166
|
+
[:end_element, :child],
|
167
|
+
[:end_element, :top],
|
168
|
+
])
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_sax_element_name_mismatch
|
172
|
+
parse_compare(%{<?xml version="1.0"?>
|
173
|
+
<top>
|
174
|
+
<child>
|
175
|
+
<grandchild/>
|
176
|
+
</parent>
|
177
|
+
</top>},
|
178
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
179
|
+
[:start_element, :top, nil],
|
180
|
+
[:start_element, :child, nil],
|
181
|
+
[:start_element, :grandchild, nil],
|
182
|
+
[:end_element, :grandchild],
|
183
|
+
[:error, "invalid format, element start and end names do not match", 5, 12]
|
184
|
+
])
|
185
|
+
end
|
186
|
+
|
187
|
+
def test_sax_nested
|
188
|
+
parse_compare(%{<?xml version="1.0"?>
|
189
|
+
<top>
|
190
|
+
<child>
|
191
|
+
<grandchild/>
|
192
|
+
</child>
|
193
|
+
<child>
|
194
|
+
<grandchild/>
|
195
|
+
<grandchild/>
|
196
|
+
</child>
|
197
|
+
</top>
|
198
|
+
},
|
199
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
200
|
+
[:start_element, :top, nil],
|
201
|
+
[:start_element, :child, nil],
|
202
|
+
[:start_element, :grandchild, nil],
|
203
|
+
[:end_element, :grandchild],
|
204
|
+
[:end_element, :child],
|
205
|
+
[:start_element, :child, nil],
|
206
|
+
[:start_element, :grandchild, nil],
|
207
|
+
[:end_element, :grandchild],
|
208
|
+
[:start_element, :grandchild, nil],
|
209
|
+
[:end_element, :grandchild],
|
210
|
+
[:end_element, :child],
|
211
|
+
[:end_element, :top],
|
212
|
+
])
|
213
|
+
end
|
214
|
+
def test_sax_element_no_term
|
215
|
+
parse_compare(%{
|
216
|
+
<top>
|
217
|
+
<child/>
|
218
|
+
},
|
219
|
+
[[:start_element, :top, nil],
|
220
|
+
[:start_element, :child, nil],
|
221
|
+
[:end_element, :child],
|
222
|
+
[:error, "invalid format, element not terminated", 4, 1]
|
223
|
+
])
|
224
|
+
end
|
225
|
+
|
226
|
+
def test_sax_text
|
227
|
+
parse_compare(%{<top>This is some text.</top>},
|
228
|
+
[[:start_element, :top, nil],
|
229
|
+
[:text, "This is some text."],
|
230
|
+
[:end_element, :top]
|
231
|
+
])
|
232
|
+
end
|
233
|
+
|
234
|
+
def test_sax_text_no_term
|
235
|
+
parse_compare(%{<top>This is some text.},
|
236
|
+
[[:start_element, :top, nil],
|
237
|
+
[:error, "invalid format, text terminated unexpectedly", 1, 24],
|
238
|
+
])
|
239
|
+
end
|
240
|
+
# TBD invalid chacters in text
|
241
|
+
|
242
|
+
def test_sax_doctype
|
243
|
+
parse_compare(%{<?xml version="1.0"?>
|
244
|
+
<!DOCTYPE top PUBLIC "top.dtd">
|
245
|
+
<top/>
|
246
|
+
},
|
247
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
248
|
+
[:doctype, ' top PUBLIC "top.dtd"'],
|
249
|
+
[:start_element, :top, nil],
|
250
|
+
[:end_element, :top]])
|
251
|
+
end
|
252
|
+
|
253
|
+
def test_sax_doctype_bad_order
|
254
|
+
parse_compare(%{<?xml version="1.0"?>
|
255
|
+
<top/>
|
256
|
+
<!DOCTYPE top PUBLIC "top.dtd">
|
257
|
+
},
|
258
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
259
|
+
[:start_element, :top, nil],
|
260
|
+
[:end_element, :top],
|
261
|
+
[:error, "invalid format, DOCTYPE can not come after an element", 3, 11],
|
262
|
+
[:doctype, ' top PUBLIC "top.dtd"']])
|
263
|
+
end
|
264
|
+
|
265
|
+
def test_sax_instruct_bad_order
|
266
|
+
parse_compare(%{
|
267
|
+
<!DOCTYPE top PUBLIC "top.dtd">
|
268
|
+
<?xml version="1.0"?>
|
269
|
+
<top/>
|
270
|
+
},
|
271
|
+
[[:doctype, " top PUBLIC \"top.dtd\""],
|
272
|
+
[:error, "invalid format, instruction must come before elements", 3, 3],
|
273
|
+
[:instruct, "xml", {:version => "1.0"}],
|
274
|
+
[:start_element, :top, nil],
|
275
|
+
[:end_element, :top]])
|
276
|
+
end
|
277
|
+
|
278
|
+
def test_sax_comment
|
279
|
+
parse_compare(%{<?xml version="1.0"?>
|
280
|
+
<!--First comment.-->
|
281
|
+
<top>Before<!--Nested comment.-->After</top>
|
282
|
+
},
|
283
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
284
|
+
[:comment, 'First comment.'],
|
285
|
+
[:start_element, :top, nil],
|
286
|
+
[:text, 'Before'],
|
287
|
+
[:comment, 'Nested comment.'],
|
288
|
+
[:text, 'After'],
|
289
|
+
[:end_element, :top]])
|
290
|
+
end
|
291
|
+
|
292
|
+
def test_sax_comment_no_term
|
293
|
+
parse_compare(%{<?xml version="1.0"?>
|
294
|
+
<!--First comment.--
|
295
|
+
<top/>
|
296
|
+
},
|
297
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
298
|
+
[:error, "invalid format, comment terminated unexpectedly", 3, 1], # continue on
|
299
|
+
[:comment, 'First comment.'],
|
300
|
+
[:start_element, :top, nil],
|
301
|
+
[:end_element, :top]])
|
302
|
+
end
|
303
|
+
|
304
|
+
def test_sax_cdata
|
305
|
+
parse_compare(%{<?xml version="1.0"?>
|
306
|
+
<top>
|
307
|
+
<![CDATA[This is CDATA.]]>
|
308
|
+
</top>
|
309
|
+
},
|
310
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
311
|
+
[:start_element, :top, nil],
|
312
|
+
[:cdata, 'This is CDATA.'],
|
313
|
+
[:end_element, :top]])
|
314
|
+
end
|
315
|
+
|
316
|
+
def test_sax_cdata_no_term
|
317
|
+
parse_compare(%{<?xml version="1.0"?>
|
318
|
+
<top>
|
319
|
+
<![CDATA[This is CDATA.]]
|
320
|
+
</top>
|
321
|
+
},
|
322
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
323
|
+
[:start_element, :top, nil],
|
324
|
+
[:error, "invalid format, cdata terminated unexpectedly", 5, 1]])
|
325
|
+
end
|
326
|
+
|
327
|
+
|
328
|
+
def test_sax_mixed
|
329
|
+
parse_compare(%{<?xml version="1.0"?>
|
330
|
+
<?ox version="1.0" mode="object" circular="no" xsd_date="no"?>
|
331
|
+
<!DOCTYPE table PUBLIC "-//ox//DTD TABLE 1.0//EN" "http://www.ohler.com/DTDs/TestTable-1.0.dtd">
|
332
|
+
<table>
|
333
|
+
<row id="00004">
|
334
|
+
<cell id="A" type="Fixnum">1234</cell>
|
335
|
+
<cell id="B" type="String">A string.</cell>
|
336
|
+
<cell id="C" type="String">This is a longer string that stretches over a larger number of characters.</cell>
|
337
|
+
<cell id="D" type="Float">-12.345</cell>
|
338
|
+
<cell id="E" type="Date">2011-09-18 23:07:26 +0900</cell>
|
339
|
+
<cell id="F" type="Image"><![CDATA[xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00]]></cell>
|
340
|
+
</row>
|
341
|
+
</table>
|
342
|
+
},
|
343
|
+
[[:instruct, 'xml', {:version => '1.0'}],
|
344
|
+
[:instruct, "ox", {:version=>"1.0", :mode=>"object", :circular=>"no", :xsd_date=>"no"}],
|
345
|
+
[:doctype, " table PUBLIC \"-//ox//DTD TABLE 1.0//EN\" \"http://www.ohler.com/DTDs/TestTable-1.0.dtd\""],
|
346
|
+
[:start_element, :table, nil],
|
347
|
+
[:start_element, :row, {:id=>"00004"}],
|
348
|
+
[:start_element, :cell, {:id=>"A", :type=>"Fixnum"}],
|
349
|
+
[:text, "1234"],
|
350
|
+
[:end_element, :cell],
|
351
|
+
[:start_element, :cell, {:id=>"B", :type=>"String"}],
|
352
|
+
[:text, "A string."],
|
353
|
+
[:end_element, :cell],
|
354
|
+
[:start_element, :cell, {:id=>"C", :type=>"String"}],
|
355
|
+
[:text, "This is a longer string that stretches over a larger number of characters."],
|
356
|
+
[:end_element, :cell],
|
357
|
+
[:start_element, :cell, {:id=>"D", :type=>"Float"}],
|
358
|
+
[:text, "-12.345"],
|
359
|
+
[:end_element, :cell],
|
360
|
+
[:start_element, :cell, {:id=>"E", :type=>"Date"}],
|
361
|
+
[:text, "2011-09-18 23:07:26 +0900"],
|
362
|
+
[:end_element, :cell],
|
363
|
+
[:start_element, :cell, {:id=>"F", :type=>"Image"}],
|
364
|
+
[:cdata, "xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00"],
|
365
|
+
[:end_element, :cell],
|
366
|
+
[:end_element, :row],
|
367
|
+
[:end_element, :table]])
|
368
|
+
end
|
369
|
+
|
370
|
+
def test_sax_encoding
|
371
|
+
parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
|
372
|
+
<top>ピーター</top>
|
373
|
+
},
|
374
|
+
[[:instruct, 'xml', {:version => '1.0', :encoding => 'UTF-8'}],
|
375
|
+
[:start_element, :top, nil],
|
376
|
+
[:text, 'ピーター'],
|
377
|
+
[:end_element, :top]])
|
378
|
+
end
|
379
|
+
|
380
|
+
end
|
381
|
+
|