ox 1.3.3 → 1.3.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +2 -5
- data/ext/ox/ox.c +2 -0
- data/ext/ox/ox.h +1 -0
- data/ext/ox/sax.c +45 -3
- data/lib/ox/version.rb +1 -1
- data/test/func.rb +12 -15
- data/test/parse_cmp.rb +261 -0
- data/test/sax_test.rb +14 -18
- metadata +4 -3
data/README.md
CHANGED
@@ -26,12 +26,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
26
26
|
|
27
27
|
## <a name="release">Release Notes</a>
|
28
28
|
|
29
|
-
### Release 1.3.
|
30
|
-
|
31
|
-
- Added an option to the SAX parser to convert special characters.
|
32
|
-
|
33
|
-
- The default options encoding is now used as the default for SAX parsing.
|
29
|
+
### Release 1.3.4
|
34
30
|
|
31
|
+
- Made Ox SAX compatible with Ruby 1.8.7 when readpartial is not implemented on IO.
|
35
32
|
|
36
33
|
## <a name="description">Description</a>
|
37
34
|
|
data/ext/ox/ox.c
CHANGED
@@ -64,6 +64,7 @@ ID local_id;
|
|
64
64
|
ID nodes_id;
|
65
65
|
ID num_id;
|
66
66
|
ID parse_id;
|
67
|
+
ID read_id;
|
67
68
|
ID readpartial_id;
|
68
69
|
ID start_element_id;
|
69
70
|
ID text_id;
|
@@ -652,6 +653,7 @@ void Init_ox() {
|
|
652
653
|
num_id = rb_intern("@num");
|
653
654
|
parse_id = rb_intern("parse");
|
654
655
|
readpartial_id = rb_intern("readpartial");
|
656
|
+
read_id = rb_intern("read");
|
655
657
|
start_element_id = rb_intern("start_element");
|
656
658
|
text_id = rb_intern("text");
|
657
659
|
to_c_id = rb_intern("to_c");
|
data/ext/ox/ox.h
CHANGED
data/ext/ox/sax.c
CHANGED
@@ -88,8 +88,10 @@ static int read_quoted_value(SaxDrive dr);
|
|
88
88
|
static int collapse_special(char *str);
|
89
89
|
|
90
90
|
static VALUE io_cb(VALUE rdr);
|
91
|
+
static VALUE partial_io_cb(VALUE rdr);
|
91
92
|
static int read_from_io(SaxDrive dr);
|
92
93
|
static int read_from_fd(SaxDrive dr);
|
94
|
+
static int read_from_io_partial(SaxDrive dr);
|
93
95
|
|
94
96
|
static inline char
|
95
97
|
sax_drive_get(SaxDrive dr) {
|
@@ -183,6 +185,16 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
183
185
|
if (rb_respond_to(io, readpartial_id)) {
|
184
186
|
VALUE rfd;
|
185
187
|
|
188
|
+
if (rb_respond_to(io, rb_intern("fileno")) && Qnil != (rfd = rb_funcall(io, rb_intern("fileno"), 0))) {
|
189
|
+
dr->read_func = read_from_fd;
|
190
|
+
dr->fd = FIX2INT(rfd);
|
191
|
+
} else {
|
192
|
+
dr->read_func = read_from_io_partial;
|
193
|
+
dr->io = io;
|
194
|
+
}
|
195
|
+
} else if (rb_respond_to(io, read_id)) {
|
196
|
+
VALUE rfd;
|
197
|
+
|
186
198
|
if (rb_respond_to(io, rb_intern("fileno")) && Qnil != (rfd = rb_funcall(io, rb_intern("fileno"), 0))) {
|
187
199
|
dr->read_func = read_from_fd;
|
188
200
|
dr->fd = FIX2INT(rfd);
|
@@ -191,7 +203,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
191
203
|
dr->io = io;
|
192
204
|
}
|
193
205
|
} else {
|
194
|
-
rb_raise(rb_eArgError, "sax_parser io argument must respond to readpartial().\n");
|
206
|
+
rb_raise(rb_eArgError, "sax_parser io argument must respond to readpartial() or read().\n");
|
195
207
|
}
|
196
208
|
dr->buf = dr->base_buf;
|
197
209
|
*dr->buf = '\0';
|
@@ -730,6 +742,35 @@ read_quoted_value(SaxDrive dr) {
|
|
730
742
|
return 0;
|
731
743
|
}
|
732
744
|
|
745
|
+
static int
|
746
|
+
read_from_io_partial(SaxDrive dr) {
|
747
|
+
int ex = 0;
|
748
|
+
|
749
|
+
rb_protect(partial_io_cb, (VALUE)dr, &ex);
|
750
|
+
// printf("*** io_cb exception = %d\n", ex);
|
751
|
+
// An error code of 6 is always returned not matter what kind of Exception is raised.
|
752
|
+
return ex;
|
753
|
+
}
|
754
|
+
|
755
|
+
static VALUE
|
756
|
+
partial_io_cb(VALUE rdr) {
|
757
|
+
SaxDrive dr = (SaxDrive)rdr;
|
758
|
+
VALUE args[1];
|
759
|
+
VALUE rstr;
|
760
|
+
char *str;
|
761
|
+
size_t cnt;
|
762
|
+
|
763
|
+
args[0] = ULONG2NUM(dr->buf_end - dr->cur);
|
764
|
+
rstr = rb_funcall2(dr->io, readpartial_id, 1, args);
|
765
|
+
str = StringValuePtr(rstr);
|
766
|
+
cnt = strlen(str);
|
767
|
+
//printf("*** read %lu bytes, str: '%s'\n", cnt, str);
|
768
|
+
strcpy(dr->cur, str);
|
769
|
+
dr->read_end = dr->cur + cnt;
|
770
|
+
|
771
|
+
return Qnil;
|
772
|
+
}
|
773
|
+
|
733
774
|
static int
|
734
775
|
read_from_io(SaxDrive dr) {
|
735
776
|
int ex = 0;
|
@@ -748,8 +789,9 @@ io_cb(VALUE rdr) {
|
|
748
789
|
char *str;
|
749
790
|
size_t cnt;
|
750
791
|
|
751
|
-
args[0] =
|
752
|
-
|
792
|
+
args[0] = ULONG2NUM(dr->buf_end - dr->cur);
|
793
|
+
//args[0] = SIZET2NUM(dr->buf_end - dr->cur);
|
794
|
+
rstr = rb_funcall2(dr->io, read_id, 1, args);
|
753
795
|
str = StringValuePtr(rstr);
|
754
796
|
cnt = strlen(str);
|
755
797
|
//printf("*** read %lu bytes, str: '%s'\n", cnt, str);
|
data/lib/ox/version.rb
CHANGED
data/test/func.rb
CHANGED
@@ -1,15 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby -wW1
|
2
2
|
# encoding: UTF-8
|
3
3
|
|
4
|
-
$: <<
|
5
|
-
$: <<
|
6
|
-
|
7
|
-
if __FILE__ == $0
|
8
|
-
while (i = ARGV.index('-I'))
|
9
|
-
x,path = ARGV.slice!(i, 2)
|
10
|
-
$: << path
|
11
|
-
end
|
12
|
-
end
|
4
|
+
$: << File.join(File.dirname(__FILE__), "../lib")
|
5
|
+
$: << File.join(File.dirname(__FILE__), "../ext")
|
13
6
|
|
14
7
|
require 'test/unit'
|
15
8
|
require 'optparse'
|
@@ -338,22 +331,26 @@ class Func < ::Test::Unit::TestCase
|
|
338
331
|
"x".foo
|
339
332
|
rescue Exception => e
|
340
333
|
xml = Ox.dump(e, :effort => :tolerant)
|
341
|
-
o = Ox.load(xml, mode
|
334
|
+
o = Ox.load(xml, :mode => :object)
|
342
335
|
xml2 = Ox.dump(o, :effort => :tolerant)
|
343
336
|
assert_equal(xml, xml2)
|
344
337
|
end
|
345
338
|
end
|
346
339
|
|
347
340
|
def test_mutex
|
348
|
-
|
349
|
-
|
350
|
-
|
341
|
+
if defined?(Mutex)
|
342
|
+
# Mutex can not be serialize but it should not raise an exception.
|
343
|
+
xml = Ox.dump(Mutex.new, :indent => 2, :effort => :tolerant)
|
344
|
+
assert_equal(%{<z/>
|
351
345
|
}, xml)
|
352
|
-
|
353
|
-
|
346
|
+
xml = Ox.dump(Bag.new(:@x => Mutex.new), :indent => 2, :effort => :tolerant)
|
347
|
+
assert_equal(%{<o c="Bag">
|
354
348
|
<z a="@x"/>
|
355
349
|
</o>
|
356
350
|
}, xml)
|
351
|
+
else
|
352
|
+
assert(true)
|
353
|
+
end
|
357
354
|
end
|
358
355
|
|
359
356
|
def test_encoding
|
data/test/parse_cmp.rb
ADDED
@@ -0,0 +1,261 @@
|
|
1
|
+
#!/usr/bin/env ruby -wW1
|
2
|
+
|
3
|
+
$: << '../lib'
|
4
|
+
$: << '../ext'
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'stringio'
|
8
|
+
require 'ox'
|
9
|
+
|
10
|
+
$verbose = 0
|
11
|
+
$iter = 100
|
12
|
+
|
13
|
+
opts = OptionParser.new
|
14
|
+
opts.on("-v", "increase verbosity") { $verbose += 1 }
|
15
|
+
opts.on("-i", "--iterations [Int]", Integer, "iterations") { |i| $iter = i }
|
16
|
+
opts.on("-h", "--help", "Show this display") { puts opts; Process.exit!(0) }
|
17
|
+
files = opts.parse(ARGV)
|
18
|
+
|
19
|
+
### XML conversion to Hash using in memory Ox parsing ###
|
20
|
+
|
21
|
+
def node_to_dict(element)
|
22
|
+
dict = Hash.new
|
23
|
+
key = nil
|
24
|
+
element.nodes.each do |n|
|
25
|
+
raise "A dict can only contain elements." unless n.is_a?(::Ox::Element)
|
26
|
+
if key.nil?
|
27
|
+
raise "Expected a key, not a #{n.name}." unless 'key' == n.name
|
28
|
+
key = first_text(n)
|
29
|
+
else
|
30
|
+
dict[key] = node_to_value(n)
|
31
|
+
key = nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
dict
|
35
|
+
end
|
36
|
+
|
37
|
+
def node_to_array(element)
|
38
|
+
a = Array.new
|
39
|
+
element.nodes.each do |n|
|
40
|
+
a.push(node_to_value(n))
|
41
|
+
end
|
42
|
+
a
|
43
|
+
end
|
44
|
+
|
45
|
+
def node_to_value(node)
|
46
|
+
raise "A dict can only contain elements." unless node.is_a?(::Ox::Element)
|
47
|
+
case node.name
|
48
|
+
when 'key'
|
49
|
+
raise "Expected a value, not a key."
|
50
|
+
when 'string'
|
51
|
+
value = first_text(node)
|
52
|
+
when 'dict'
|
53
|
+
value = node_to_dict(node)
|
54
|
+
when 'array'
|
55
|
+
value = node_to_array(node)
|
56
|
+
when 'integer'
|
57
|
+
value = first_text(node).to_i
|
58
|
+
when 'real'
|
59
|
+
value = first_text(node).to_f
|
60
|
+
when 'true'
|
61
|
+
value = true
|
62
|
+
when 'false'
|
63
|
+
value = false
|
64
|
+
else
|
65
|
+
raise "#{node.name} is not a know element type."
|
66
|
+
end
|
67
|
+
value
|
68
|
+
end
|
69
|
+
|
70
|
+
def first_text(node)
|
71
|
+
node.nodes.each do |n|
|
72
|
+
return n if n.is_a?(String)
|
73
|
+
end
|
74
|
+
nil
|
75
|
+
end
|
76
|
+
|
77
|
+
def parse_gen(xml)
|
78
|
+
doc = Ox.parse(xml)
|
79
|
+
plist = doc.root
|
80
|
+
dict = nil
|
81
|
+
plist.nodes.each do |n|
|
82
|
+
if n.is_a?(::Ox::Element)
|
83
|
+
dict = node_to_dict(n)
|
84
|
+
break
|
85
|
+
end
|
86
|
+
end
|
87
|
+
dict
|
88
|
+
end
|
89
|
+
|
90
|
+
### XML conversion to Hash using Ox SAX parser ###
|
91
|
+
|
92
|
+
class Handler
|
93
|
+
def initialize()
|
94
|
+
@key = nil
|
95
|
+
@type = nil
|
96
|
+
@plist = nil
|
97
|
+
@stack = []
|
98
|
+
end
|
99
|
+
|
100
|
+
def text(value)
|
101
|
+
last = @stack.last
|
102
|
+
if last.is_a?(Hash) and @key.nil?
|
103
|
+
raise "Expected a key, not #{@type} with a value of #{value}." unless :key == @type
|
104
|
+
@key = value
|
105
|
+
else
|
106
|
+
append(value)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def start_element(name)
|
111
|
+
if :dict == name
|
112
|
+
dict = Hash.new
|
113
|
+
append(dict)
|
114
|
+
@stack.push(dict)
|
115
|
+
elsif :array == name
|
116
|
+
a = Array.new
|
117
|
+
append(a)
|
118
|
+
@stack.push(a)
|
119
|
+
elsif :true == name
|
120
|
+
append(true)
|
121
|
+
elsif :false == name
|
122
|
+
append(false)
|
123
|
+
else
|
124
|
+
@type = name
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def end_element(name)
|
129
|
+
@stack.pop if :dict == name or :array == name
|
130
|
+
end
|
131
|
+
|
132
|
+
def plist
|
133
|
+
@plist
|
134
|
+
end
|
135
|
+
|
136
|
+
def append(value)
|
137
|
+
unless value.is_a?(Array) or value.is_a?(Hash)
|
138
|
+
case @type
|
139
|
+
when :string
|
140
|
+
# ignore
|
141
|
+
when :key
|
142
|
+
# ignore
|
143
|
+
when :integer
|
144
|
+
value = value.to_i
|
145
|
+
when :real
|
146
|
+
value = value.to_f
|
147
|
+
end
|
148
|
+
end
|
149
|
+
last = @stack.last
|
150
|
+
if last.is_a?(Hash)
|
151
|
+
raise "Expected a key, not with a value of #{value}." if @key.nil?
|
152
|
+
last[@key] = value
|
153
|
+
@key = nil
|
154
|
+
elsif last.is_a?(Array)
|
155
|
+
last.push(value)
|
156
|
+
elsif last.nil?
|
157
|
+
@plist = value
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
def parse_sax(xml)
|
164
|
+
io = StringIO.new(xml)
|
165
|
+
start = Time.now
|
166
|
+
handler = Handler.new()
|
167
|
+
Ox.sax_parse(handler, io)
|
168
|
+
handler.plist
|
169
|
+
end
|
170
|
+
|
171
|
+
### XML conversion to Hash using Ox Object parsing with gsub! replacements ###
|
172
|
+
|
173
|
+
def convert_parse_obj(xml)
|
174
|
+
xml = plist_to_obj_xml(xml)
|
175
|
+
::Ox.load(xml, :mode => :object)
|
176
|
+
end
|
177
|
+
|
178
|
+
### XML conversion to Hash using Ox Object parsing after gsub! replacements ###
|
179
|
+
|
180
|
+
def parse_obj(xml)
|
181
|
+
::Ox.load(xml, :mode => :object)
|
182
|
+
end
|
183
|
+
|
184
|
+
def plist_to_obj_xml(xml)
|
185
|
+
xml = xml.gsub(%{<plist version="1.0">
|
186
|
+
}, '')
|
187
|
+
xml.gsub!(%{
|
188
|
+
</plist>}, '')
|
189
|
+
{ '<dict>' => '<h>',
|
190
|
+
'</dict>' => '</h>',
|
191
|
+
'<dict/>' => '<h/>',
|
192
|
+
'<array>' => '<a>',
|
193
|
+
'</array>' => '</a>',
|
194
|
+
'<array/>' => '<a/>',
|
195
|
+
'<string>' => '<s>',
|
196
|
+
'</string>' => '</s>',
|
197
|
+
'<string/>' => '<s/>',
|
198
|
+
'<key>' => '<s>',
|
199
|
+
'</key>' => '</s>',
|
200
|
+
'<integer>' => '<i>',
|
201
|
+
'</integer>' => '</i>',
|
202
|
+
'<integer/>' => '<i/>',
|
203
|
+
'<real>' => '<f>',
|
204
|
+
'</real>' => '</f>',
|
205
|
+
'<real/>' => '<f/>',
|
206
|
+
'<true/>' => '<y/>',
|
207
|
+
'<false/>' => '<n/>',
|
208
|
+
}.each do |pat,rep|
|
209
|
+
xml.gsub!(pat, rep)
|
210
|
+
end
|
211
|
+
xml
|
212
|
+
end
|
213
|
+
|
214
|
+
files.each do |filename|
|
215
|
+
xml = File.read(filename)
|
216
|
+
|
217
|
+
if 0 < $verbose
|
218
|
+
d1 = parse_gen(xml)
|
219
|
+
d2 = parse_sax(xml)
|
220
|
+
d3 = convert_parse_obj(xml)
|
221
|
+
puts "--- It is #{d1 == d2 and d2 == d3} that all parsers yield the same Hash. ---"
|
222
|
+
end
|
223
|
+
|
224
|
+
start = Time.now
|
225
|
+
$iter.times do
|
226
|
+
parse_gen(xml)
|
227
|
+
end
|
228
|
+
gen_time = Time.now - start
|
229
|
+
|
230
|
+
start = Time.now
|
231
|
+
$iter.times do
|
232
|
+
parse_sax(xml)
|
233
|
+
end
|
234
|
+
sax_time = Time.now - start
|
235
|
+
|
236
|
+
start = Time.now
|
237
|
+
$iter.times do
|
238
|
+
convert_parse_obj(xml)
|
239
|
+
end
|
240
|
+
conv_obj_time = Time.now - start
|
241
|
+
|
242
|
+
xml = plist_to_obj_xml(xml)
|
243
|
+
start = Time.now
|
244
|
+
$iter.times do
|
245
|
+
parse_obj(xml)
|
246
|
+
end
|
247
|
+
obj_time = Time.now - start
|
248
|
+
|
249
|
+
puts "In memory parsing and conversion took #{gen_time} for #{$iter} iterations."
|
250
|
+
puts "SAX parsing and conversion took #{sax_time} for #{$iter} iterations."
|
251
|
+
puts "XML gsub Object parsing and conversion took #{conv_obj_time} for #{$iter} iterations."
|
252
|
+
puts "Object parsing and conversion took #{obj_time} for #{$iter} iterations."
|
253
|
+
end
|
254
|
+
|
255
|
+
# Results for a run:
|
256
|
+
#
|
257
|
+
# > parse_cmp.rb Sample.graffle -i 1000
|
258
|
+
# In memory parsing and conversion took 4.135701 for 1000 iterations.
|
259
|
+
# SAX parsing and conversion took 3.731695 for 1000 iterations.
|
260
|
+
# XML gsub Object parsing and conversion took 3.292397 for 1000 iterations.
|
261
|
+
# Object parsing and conversion took 0.808877 for 1000 iterations.
|
data/test/sax_test.rb
CHANGED
@@ -1,15 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby -wW1
|
2
2
|
# encoding: UTF-8
|
3
3
|
|
4
|
-
$: <<
|
5
|
-
$: <<
|
6
|
-
|
7
|
-
if __FILE__ == $0
|
8
|
-
while (i = ARGV.index('-I'))
|
9
|
-
x,path = ARGV.slice!(i, 2)
|
10
|
-
$: << path
|
11
|
-
end
|
12
|
-
end
|
4
|
+
$: << File.join(File.dirname(__FILE__), "../lib")
|
5
|
+
$: << File.join(File.dirname(__FILE__), "../ext")
|
13
6
|
|
14
7
|
require 'stringio'
|
15
8
|
require 'test/unit'
|
@@ -85,7 +78,7 @@ class Func < ::Test::Unit::TestCase
|
|
85
78
|
|
86
79
|
def test_sax_io_file
|
87
80
|
handler = AllSax.new()
|
88
|
-
input = IO.open(IO.sysopen('basic.xml'))
|
81
|
+
input = IO.open(IO.sysopen(File.join(File.dirname(__FILE__), 'basic.xml')))
|
89
82
|
Ox.sax_parse(handler, input)
|
90
83
|
assert_equal(handler.calls,
|
91
84
|
[[:start_element, :top],
|
@@ -420,16 +413,19 @@ encoding = "UTF-8" ?>},
|
|
420
413
|
end
|
421
414
|
|
422
415
|
def test_sax_encoding
|
423
|
-
|
416
|
+
if RUBY_VERSION.start_with?('1.8')
|
417
|
+
assert(true)
|
418
|
+
else
|
419
|
+
parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
|
424
420
|
<top>ピーター</top>
|
425
421
|
},
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
422
|
+
[[:instruct, "xml"],
|
423
|
+
[:attr, :version, "1.0"],
|
424
|
+
[:attr, :encoding, "UTF-8"],
|
425
|
+
[:start_element, :top],
|
426
|
+
[:text, 'ピーター'],
|
427
|
+
[:end_element, :top]])
|
428
|
+
end
|
432
429
|
end
|
433
430
|
|
434
431
|
end
|
435
|
-
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 1.3.
|
8
|
+
- 4
|
9
|
+
version: 1.3.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Peter Ohler
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-09-
|
17
|
+
date: 2011-09-26 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -75,6 +75,7 @@ files:
|
|
75
75
|
- test/ox/rect.rb
|
76
76
|
- test/ox/shape.rb
|
77
77
|
- test/ox/text.rb
|
78
|
+
- test/parse_cmp.rb
|
78
79
|
- test/perf_gen.rb
|
79
80
|
- test/perf_mars.rb
|
80
81
|
- test/perf_obj.rb
|