ox 1.3.1 → 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +31 -26
- data/ext/ox/ox.c +2 -0
- data/ext/ox/ox.h +1 -0
- data/ext/ox/sax.c +31 -33
- data/lib/ox/sax.rb +8 -4
- data/lib/ox/version.rb +1 -1
- data/test/perf_gen.rb +68 -24
- data/test/perf_sax.rb +8 -7
- data/test/sax_test.rb +103 -62
- metadata +3 -3
data/README.md
CHANGED
@@ -18,12 +18,17 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
18
18
|
|
19
19
|
[![Build Status](http://travis-ci.org/ohler55/ox.png)](http://travis-ci.org/ohler55/ox)
|
20
20
|
|
21
|
+
## <a name="links">Links of Interest</a>
|
22
|
+
|
23
|
+
[Ruby XML Gem Comparison](http://www.ohler.com/software/thoughts/Blog/Entries/2011/9/21_XML_with_Ruby.html) for a perfomance comparison between Ox, Nokogiri, and LibXML.
|
24
|
+
|
25
|
+
[Fast Ruby XML Serialization](http://www.ohler.com/software/thoughts/Blog/Entries/2011/9/20_Ruby_Object_XML_Serialization.html) to see how Ox can be used as a faster replacement for Marshal.
|
26
|
+
|
21
27
|
## <a name="release">Release Notes</a>
|
22
28
|
|
23
|
-
### Release 1.3.
|
29
|
+
### Release 1.3.2
|
24
30
|
|
25
|
-
-
|
26
|
-
- added SAX parser, 30+ times faster than Nokogiri and 10+ times faster than LibXML
|
31
|
+
- Changed SAX parser API for element and instruction attributes
|
27
32
|
|
28
33
|
## <a name="description">Description</a>
|
29
34
|
|
@@ -57,8 +62,8 @@ files may result in slightly different times.
|
|
57
62
|
As an Object serializer Ox is up to 6 times faster than the standard Ruby
|
58
63
|
Marshal.dump() and up to 3 times faster than Marshal.load().
|
59
64
|
|
60
|
-
The SAX like stream parser is
|
61
|
-
|
65
|
+
The SAX like stream parser is 40 times faster than Nokogiri and more than 13
|
66
|
+
times faster than LibXML when validating a file with minimal Ruby
|
62
67
|
callbacks. Unlike Nokogiri and LibXML, Ox can be tuned to use only the SAX
|
63
68
|
callbacks that are of interest to the caller. (See the perf_sax.rb file for an
|
64
69
|
example.)
|
@@ -120,27 +125,27 @@ necessary.
|
|
120
125
|
|
121
126
|
The type indicator map is:
|
122
127
|
|
123
|
-
-
|
124
|
-
-
|
125
|
-
-
|
126
|
-
-
|
127
|
-
-
|
128
|
-
-
|
129
|
-
-
|
130
|
-
-
|
131
|
-
-
|
132
|
-
-
|
133
|
-
-
|
134
|
-
-
|
135
|
-
-
|
136
|
-
-
|
137
|
-
-
|
138
|
-
-
|
139
|
-
-
|
140
|
-
-
|
141
|
-
-
|
142
|
-
-
|
143
|
-
-
|
128
|
+
- **a** => Array
|
129
|
+
- **b** => Base64
|
130
|
+
- **c** => Class
|
131
|
+
- **f** => Float
|
132
|
+
- **g** => Regexp
|
133
|
+
- **h** => Hash
|
134
|
+
- **i** => Fixnum
|
135
|
+
- **j** => Bignum
|
136
|
+
- **l** => Rational
|
137
|
+
- **m** => Symbol
|
138
|
+
- **n** => FalseClass
|
139
|
+
- **o** => Object
|
140
|
+
- **p** => Ref
|
141
|
+
- **r** => Range
|
142
|
+
- **s** => String
|
143
|
+
- **t** => Time
|
144
|
+
- **u** => Struct
|
145
|
+
- **v** => Complex
|
146
|
+
- **x** => Raw
|
147
|
+
- **y** => TrueClass
|
148
|
+
- **z** => NilClass
|
144
149
|
|
145
150
|
If the type is an Object, type 'o' then an attribute named 'c' should be set
|
146
151
|
with the full Class name including the Module names. If the XML element
|
data/ext/ox/ox.c
CHANGED
@@ -46,6 +46,7 @@ void Init_ox();
|
|
46
46
|
VALUE Ox = Qnil;
|
47
47
|
|
48
48
|
ID at_id;
|
49
|
+
ID attr_id;
|
49
50
|
ID attributes_id;
|
50
51
|
ID beg_id;
|
51
52
|
ID cdata_id;
|
@@ -616,6 +617,7 @@ void Init_ox() {
|
|
616
617
|
|
617
618
|
rb_require("time");
|
618
619
|
at_id = rb_intern("at");
|
620
|
+
attr_id = rb_intern("attr");
|
619
621
|
attributes_id = rb_intern("@attributes");
|
620
622
|
beg_id = rb_intern("@beg");
|
621
623
|
cdata_id = rb_intern("cdata");
|
data/ext/ox/ox.h
CHANGED
data/ext/ox/sax.c
CHANGED
@@ -56,6 +56,7 @@ typedef struct _SaxDrive {
|
|
56
56
|
VALUE io;
|
57
57
|
};
|
58
58
|
int has_instruct;
|
59
|
+
int has_attr;
|
59
60
|
int has_doctype;
|
60
61
|
int has_comment;
|
61
62
|
int has_cdata;
|
@@ -80,7 +81,7 @@ static int read_cdata(SaxDrive dr);
|
|
80
81
|
static int read_comment(SaxDrive dr);
|
81
82
|
static int read_element(SaxDrive dr);
|
82
83
|
static int read_text(SaxDrive dr);
|
83
|
-
static int read_attrs(SaxDrive dr,
|
84
|
+
static int read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml);
|
84
85
|
static char read_name_token(SaxDrive dr);
|
85
86
|
static int read_quoted_value(SaxDrive dr);
|
86
87
|
|
@@ -162,6 +163,7 @@ ox_sax_parse(VALUE handler, VALUE io) {
|
|
162
163
|
#if 0
|
163
164
|
printf("*** sax_parse with these flags\n");
|
164
165
|
printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
|
166
|
+
printf(" has_attr = %s\n", dr.has_attr ? "true" : "false");
|
165
167
|
printf(" has_doctype = %s\n", dr.has_doctype ? "true" : "false");
|
166
168
|
printf(" has_comment = %s\n", dr.has_comment ? "true" : "false");
|
167
169
|
printf(" has_cdata = %s\n", dr.has_cdata ? "true" : "false");
|
@@ -199,6 +201,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
|
|
199
201
|
dr->col = 0;
|
200
202
|
dr->handler = handler;
|
201
203
|
dr->has_instruct = rb_respond_to(handler, instruct_id);
|
204
|
+
dr->has_attr = rb_respond_to(handler, attr_id);
|
202
205
|
dr->has_doctype = rb_respond_to(handler, doctype_id);
|
203
206
|
dr->has_comment = rb_respond_to(handler, comment_id);
|
204
207
|
dr->has_cdata = rb_respond_to(handler, cdata_id);
|
@@ -373,17 +376,18 @@ read_children(SaxDrive dr, int first) {
|
|
373
376
|
*/
|
374
377
|
static int
|
375
378
|
read_instruction(SaxDrive dr) {
|
376
|
-
VALUE target = Qnil;
|
377
|
-
VALUE attrs = Qnil;
|
378
379
|
char c;
|
379
380
|
|
380
381
|
if ('\0' == (c = read_name_token(dr))) {
|
381
382
|
return -1;
|
382
383
|
}
|
383
384
|
if (dr->has_instruct) {
|
384
|
-
|
385
|
+
VALUE args[1];
|
386
|
+
|
387
|
+
args[0] = rb_str_new2(dr->str);
|
388
|
+
rb_funcall2(dr->handler, instruct_id, 1, args);
|
385
389
|
}
|
386
|
-
if (0 != read_attrs(dr,
|
390
|
+
if (0 != read_attrs(dr, c, '?', '?', (0 == strcmp("xml", dr->str)))) {
|
387
391
|
return -1;
|
388
392
|
}
|
389
393
|
c = next_non_white(dr);
|
@@ -391,13 +395,6 @@ read_instruction(SaxDrive dr) {
|
|
391
395
|
sax_drive_error(dr, "invalid format, instruction not terminated", 1);
|
392
396
|
return -1;
|
393
397
|
}
|
394
|
-
if (0 != dr->has_instruct) {
|
395
|
-
VALUE args[2];
|
396
|
-
|
397
|
-
args[0] = target;
|
398
|
-
args[1] = attrs;
|
399
|
-
rb_funcall2(dr->handler, instruct_id, 2, args);
|
400
|
-
}
|
401
398
|
dr->str = 0;
|
402
399
|
|
403
400
|
return 0;
|
@@ -519,7 +516,6 @@ read_comment(SaxDrive dr) {
|
|
519
516
|
static int
|
520
517
|
read_element(SaxDrive dr) {
|
521
518
|
VALUE name = Qnil;
|
522
|
-
VALUE attrs = Qnil;
|
523
519
|
char c;
|
524
520
|
int closed;
|
525
521
|
|
@@ -527,12 +523,18 @@ read_element(SaxDrive dr) {
|
|
527
523
|
return -1;
|
528
524
|
}
|
529
525
|
name = str2sym(dr->str);
|
526
|
+
if (dr->has_start_element) {
|
527
|
+
VALUE args[1];
|
528
|
+
|
529
|
+
args[0] = name;
|
530
|
+
rb_funcall2(dr->handler, start_element_id, 1, args);
|
531
|
+
}
|
530
532
|
if ('/' == c) {
|
531
533
|
closed = 1;
|
532
534
|
} else if ('>' == c) {
|
533
535
|
closed = 0;
|
534
536
|
} else {
|
535
|
-
if (0 != read_attrs(dr,
|
537
|
+
if (0 != read_attrs(dr, c, '/', '>', 0)) {
|
536
538
|
return -1;
|
537
539
|
}
|
538
540
|
closed = ('/' == *(dr->cur - 1));
|
@@ -544,17 +546,14 @@ read_element(SaxDrive dr) {
|
|
544
546
|
return -1;
|
545
547
|
}
|
546
548
|
}
|
547
|
-
if (
|
548
|
-
|
549
|
+
if (closed) {
|
550
|
+
if (dr->has_end_element) {
|
551
|
+
VALUE args[1];
|
549
552
|
|
550
|
-
|
551
|
-
args[1] = attrs;
|
552
|
-
rb_funcall2(dr->handler, start_element_id, 2, args);
|
553
|
-
if (closed && dr->has_end_element) {
|
553
|
+
args[0] = name;
|
554
554
|
rb_funcall2(dr->handler, end_element_id, 1, args);
|
555
555
|
}
|
556
|
-
}
|
557
|
-
if (!closed) {
|
556
|
+
} else {
|
558
557
|
if (0 != read_children(dr, 0)) {
|
559
558
|
return -1;
|
560
559
|
}
|
@@ -601,7 +600,7 @@ read_text(SaxDrive dr) {
|
|
601
600
|
}
|
602
601
|
|
603
602
|
static int
|
604
|
-
read_attrs(SaxDrive dr,
|
603
|
+
read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
605
604
|
VALUE name = Qnil;
|
606
605
|
int is_encoding = 0;
|
607
606
|
|
@@ -618,10 +617,10 @@ read_attrs(SaxDrive dr, VALUE *attrs, char c, char termc, char term2) {
|
|
618
617
|
if ('\0' == (c = read_name_token(dr))) {
|
619
618
|
return -1;
|
620
619
|
}
|
621
|
-
if (
|
620
|
+
if (is_xml && 0 == strcmp("encoding", dr->str)) {
|
622
621
|
is_encoding = 1;
|
623
622
|
}
|
624
|
-
if (dr->
|
623
|
+
if (dr->has_attr) {
|
625
624
|
name = str2sym(dr->str);
|
626
625
|
}
|
627
626
|
if (is_white(c)) {
|
@@ -639,18 +638,17 @@ read_attrs(SaxDrive dr, VALUE *attrs, char c, char termc, char term2) {
|
|
639
638
|
dr->encoding = rb_enc_find(dr->str);
|
640
639
|
}
|
641
640
|
#endif
|
642
|
-
if (dr->
|
643
|
-
VALUE
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
}
|
641
|
+
if (dr->has_attr) {
|
642
|
+
VALUE args[2];
|
643
|
+
|
644
|
+
args[0] = name;
|
645
|
+
args[1] = rb_str_new2(dr->str);
|
648
646
|
#ifdef HAVE_RUBY_ENCODING_H
|
649
647
|
if (0 != dr->encoding) {
|
650
|
-
rb_enc_associate(
|
648
|
+
rb_enc_associate(args[1], dr->encoding);
|
651
649
|
}
|
652
650
|
#endif
|
653
|
-
|
651
|
+
rb_funcall2(dr->handler, attr_id, 2, args);
|
654
652
|
}
|
655
653
|
c = next_non_white(dr);
|
656
654
|
}
|
data/lib/ox/sax.rb
CHANGED
@@ -26,12 +26,13 @@ module Ox
|
|
26
26
|
# be made public in the subclasses. If the methods remain private they will
|
27
27
|
# not be called during parsing.
|
28
28
|
#
|
29
|
-
# def instruct(target
|
29
|
+
# def instruct(target); end
|
30
|
+
# def attr(name, value); end
|
30
31
|
# def doctype(value); end
|
31
32
|
# def comment(value); end
|
32
33
|
# def cdata(value); end
|
33
34
|
# def text(value); end
|
34
|
-
# def start_element(name
|
35
|
+
# def start_element(name); end
|
35
36
|
# def end_element(name); end
|
36
37
|
#
|
37
38
|
class Sax
|
@@ -44,7 +45,10 @@ module Ox
|
|
44
45
|
# they will not be called during parsing.
|
45
46
|
private
|
46
47
|
|
47
|
-
def instruct(target
|
48
|
+
def instruct(target)
|
49
|
+
end
|
50
|
+
|
51
|
+
def attr(name, value)
|
48
52
|
end
|
49
53
|
|
50
54
|
def doctype(value)
|
@@ -59,7 +63,7 @@ module Ox
|
|
59
63
|
def text(value)
|
60
64
|
end
|
61
65
|
|
62
|
-
def start_element(name
|
66
|
+
def start_element(name)
|
63
67
|
end
|
64
68
|
|
65
69
|
def end_element(name)
|
data/lib/ox/version.rb
CHANGED
data/test/perf_gen.rb
CHANGED
@@ -17,7 +17,14 @@ require 'ox'
|
|
17
17
|
require 'sample'
|
18
18
|
require 'test/ox/doc'
|
19
19
|
require 'files'
|
20
|
-
|
20
|
+
begin
|
21
|
+
require 'nokogiri'
|
22
|
+
rescue Exception => e
|
23
|
+
end
|
24
|
+
begin
|
25
|
+
require 'libxml'
|
26
|
+
rescue Exception => e
|
27
|
+
end
|
21
28
|
|
22
29
|
$verbose = 0
|
23
30
|
$ox_only = false
|
@@ -54,25 +61,31 @@ if files.empty?
|
|
54
61
|
data = []
|
55
62
|
obj = do_sample ? sample_doc(2) : files('..')
|
56
63
|
xml = Ox.dump(obj, :indent => 2, :opt_format => true)
|
57
|
-
gen = Ox.parse(xml)
|
58
|
-
no = Nokogiri::XML::Document.parse(xml)
|
59
64
|
File.open('sample.xml', 'w') { |f| f.write(xml) }
|
60
|
-
|
65
|
+
gen = Ox.parse(xml)
|
66
|
+
h = { :file => 'sample.xml', :xml => xml, :ox => gen }
|
67
|
+
h[:nokogiri] = Nokogiri::XML::Document.parse(xml) unless defined?(::Nokogiri).nil?
|
68
|
+
h[:libxml] = LibXML::XML::Document.string(xml) unless defined?(::LibXML).nil?
|
69
|
+
data << h
|
61
70
|
else
|
62
71
|
puts "loading and parsing #{files}\n\n"
|
63
72
|
data = files.map do |f|
|
64
73
|
xml = File.read(f)
|
65
74
|
obj = Ox.parse(xml);
|
66
75
|
gen = Ox.parse(xml)
|
67
|
-
|
68
|
-
|
76
|
+
h = { :file => f, :xml => xml, :ox => gen }
|
77
|
+
h[:nokogiri] = Nokogiri::XML::Document.parse(xml) unless defined?(::Nokogiri).nil?
|
78
|
+
h[:libxml] = LibXML::XML::Document.string(xml) unless defined?(::LibXML).nil?
|
79
|
+
h
|
69
80
|
end
|
70
81
|
end
|
71
82
|
|
72
83
|
$ox_load_time = 0
|
73
84
|
$no_load_time = 0
|
85
|
+
$lx_load_time = 0
|
74
86
|
$ox_dump_time = 0
|
75
87
|
$no_dump_time = 0
|
88
|
+
$lx_dump_time = 0
|
76
89
|
|
77
90
|
def perf_load(d)
|
78
91
|
xml = d[:xml]
|
@@ -91,14 +104,28 @@ def perf_load(d)
|
|
91
104
|
|
92
105
|
return if $ox_only
|
93
106
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
107
|
+
unless defined?(::Nokogiri).nil?
|
108
|
+
start = Time.now
|
109
|
+
(1..$iter).each do
|
110
|
+
obj = Nokogiri::XML::Document.parse(xml)
|
111
|
+
#obj = Nokogiri::XML::Document.parse(xml, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
|
112
|
+
end
|
113
|
+
$no_load_time = Time.now - start
|
114
|
+
puts "Nokogiri parse #{$iter} times took #{$no_load_time} seconds."
|
115
|
+
end
|
116
|
+
|
117
|
+
unless defined?(::LibXML).nil?
|
118
|
+
start = Time.now
|
119
|
+
(1..$iter).each do
|
120
|
+
obj = LibXML::XML::Document.string(xml)
|
121
|
+
end
|
122
|
+
$lx_load_time = Time.now - start
|
123
|
+
puts "LibXML parse #{$iter} times took #{$lx_load_time} seconds."
|
98
124
|
end
|
99
|
-
|
100
|
-
puts "
|
101
|
-
puts ">>> Ox is %0.1f faster than
|
125
|
+
puts "\n"
|
126
|
+
puts ">>> Ox is %0.1f faster than Nokogiri parsing." % [$no_load_time/$ox_load_time] unless defined?(::Nokogiri).nil?
|
127
|
+
puts ">>> Ox is %0.1f faster than LibXML parsing." % [$lx_load_time/$ox_load_time] unless defined?(::LibXML).nil?
|
128
|
+
puts "\n"
|
102
129
|
end
|
103
130
|
|
104
131
|
def perf_dump(d)
|
@@ -114,14 +141,29 @@ def perf_dump(d)
|
|
114
141
|
|
115
142
|
return if $ox_only
|
116
143
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
144
|
+
unless defined?(::Nokogiri).nil?
|
145
|
+
obj = d[:nokogiri]
|
146
|
+
start = Time.now
|
147
|
+
(1..$iter).each do
|
148
|
+
xml = obj.to_xml(:indent => 2)
|
149
|
+
end
|
150
|
+
$no_dump_time = Time.now - start
|
151
|
+
puts "Nokogiri to_xml #{$iter} times took #{$no_dump_time} seconds."
|
152
|
+
end
|
153
|
+
|
154
|
+
unless defined?(::LibXML).nil?
|
155
|
+
obj = d[:libxml]
|
156
|
+
start = Time.now
|
157
|
+
(1..$iter).each do
|
158
|
+
xml = obj.to_s()
|
159
|
+
end
|
160
|
+
$lx_dump_time = Time.now - start
|
161
|
+
puts "LibML to_s #{$iter} times took #{$lx_dump_time} seconds."
|
121
162
|
end
|
122
|
-
|
123
|
-
puts "
|
124
|
-
puts ">>> Ox is %0.1f faster than
|
163
|
+
puts "\n"
|
164
|
+
puts ">>> Ox is %0.1f faster than Nokkgiri to_xml." % [$no_dump_time/$ox_dump_time] unless defined?(::Nokogiri).nil?
|
165
|
+
puts ">>> Ox is %0.1f faster than LibXML to_xml." % [$lx_dump_time/$ox_dump_time] unless defined?(::LibXML).nil?
|
166
|
+
puts "\n"
|
125
167
|
end
|
126
168
|
|
127
169
|
def perf_read(d)
|
@@ -137,7 +179,9 @@ def perf_read(d)
|
|
137
179
|
ox_read_time = Time.now - start
|
138
180
|
puts "Loading and parsing #{$iter} times with ox took #{ox_read_time} seconds."
|
139
181
|
|
140
|
-
if
|
182
|
+
return if $ox_only
|
183
|
+
|
184
|
+
unless defined?(::Nokogiri).nil?
|
141
185
|
start = Time.now
|
142
186
|
(1..$iter).each do
|
143
187
|
xml = File.read(filename)
|
@@ -146,7 +190,6 @@ def perf_read(d)
|
|
146
190
|
no_read_time = Time.now - start
|
147
191
|
puts "Reading and parsing #{$iter} times took #{no_read_time} seconds."
|
148
192
|
puts ">>> Ox is %0.1f faster than Nokogiri loading and parsing.\n\n" % [no_read_time/ox_read_time]
|
149
|
-
|
150
193
|
end
|
151
194
|
end
|
152
195
|
|
@@ -164,7 +207,9 @@ def perf_write(d)
|
|
164
207
|
ox_write_time = Time.now - start
|
165
208
|
puts "Ox dumping #{$iter} times with ox took #{ox_write_time} seconds."
|
166
209
|
|
167
|
-
if
|
210
|
+
return if $ox_only
|
211
|
+
|
212
|
+
unless defined?(::Nokogiri).nil?
|
168
213
|
obj = d[:nokogiri]
|
169
214
|
start = Time.now
|
170
215
|
(1..$iter).each do
|
@@ -174,7 +219,6 @@ def perf_write(d)
|
|
174
219
|
no_write_time = Time.now - start
|
175
220
|
puts "Nokogiri dumping and writing #{$iter} times took #{no_write_time} seconds."
|
176
221
|
puts ">>> Ox is %0.1f faster than Nokogiri writing.\n\n" % [no_write_time/ox_write_time]
|
177
|
-
|
178
222
|
end
|
179
223
|
end
|
180
224
|
|
data/test/perf_sax.rb
CHANGED
@@ -77,26 +77,27 @@ def create_file(filename, size)
|
|
77
77
|
end
|
78
78
|
|
79
79
|
class OxSax < ::Ox::Sax
|
80
|
-
def start_element(name, attrs); end
|
81
80
|
def error(message, line, column); puts message; end
|
82
81
|
end
|
83
82
|
|
84
83
|
class OxAllSax < OxSax
|
85
|
-
def
|
86
|
-
def
|
84
|
+
def start_element(name); end
|
85
|
+
def attr(name, value); end
|
86
|
+
def end_element(name); end
|
87
|
+
def text(value); end
|
88
|
+
def instruct(target); end
|
87
89
|
def doctype(value); end
|
88
90
|
def comment(value); end
|
89
91
|
def cdata(value); end
|
90
|
-
def text(value); end
|
91
92
|
end
|
92
93
|
|
93
94
|
unless defined?(::Nokogiri).nil?
|
94
95
|
class NoSax < Nokogiri::XML::SAX::Document
|
95
|
-
def start_element(name, attrs = []); end
|
96
96
|
def error(message); puts message; end
|
97
97
|
def warning(message); puts message; end
|
98
98
|
end
|
99
99
|
class NoAllSax < NoSax
|
100
|
+
def start_element(name, attrs = []); end
|
100
101
|
def characters(text); end
|
101
102
|
def cdata_block(string); end
|
102
103
|
def comment(string); end
|
@@ -110,9 +111,9 @@ end
|
|
110
111
|
unless defined?(::LibXML).nil?
|
111
112
|
class LxSax
|
112
113
|
include LibXML::XML::SaxParser::Callbacks
|
113
|
-
def on_start_element(element, attributes); end
|
114
114
|
end
|
115
115
|
class LxAllSax < LxSax
|
116
|
+
def on_start_element(element, attributes); end
|
116
117
|
def on_cdata_block(cdata); end
|
117
118
|
def on_characters(chars); end
|
118
119
|
def on_comment(msg); end
|
@@ -178,7 +179,7 @@ end
|
|
178
179
|
|
179
180
|
def perf_fileio()
|
180
181
|
puts "\n"
|
181
|
-
puts "A #{$filesize} KByte XML file was parsed #{$iter} for this test."
|
182
|
+
puts "A #{$filesize} KByte XML file was parsed #{$iter} times for this test."
|
182
183
|
puts "\n"
|
183
184
|
start = Time.now
|
184
185
|
handler = $all_cbs ? OxAllSax.new() : OxSax.new()
|
data/test/sax_test.rb
CHANGED
@@ -27,8 +27,12 @@ class StartSax < ::Ox::Sax
|
|
27
27
|
@calls = []
|
28
28
|
end
|
29
29
|
|
30
|
-
def start_element(name
|
31
|
-
@calls << [:start_element, name
|
30
|
+
def start_element(name)
|
31
|
+
@calls << [:start_element, name]
|
32
|
+
end
|
33
|
+
|
34
|
+
def attr(name, value)
|
35
|
+
@calls << [:attr, name, value]
|
32
36
|
end
|
33
37
|
end
|
34
38
|
|
@@ -37,8 +41,8 @@ class AllSax < StartSax
|
|
37
41
|
super
|
38
42
|
end
|
39
43
|
|
40
|
-
def instruct(target
|
41
|
-
@calls << [:instruct, target
|
44
|
+
def instruct(target)
|
45
|
+
@calls << [:instruct, target]
|
42
46
|
end
|
43
47
|
|
44
48
|
def doctype(value)
|
@@ -75,7 +79,7 @@ class Func < ::Test::Unit::TestCase
|
|
75
79
|
w.close
|
76
80
|
Ox.sax_parse(handler, input)
|
77
81
|
assert_equal(handler.calls,
|
78
|
-
[[:start_element, :top
|
82
|
+
[[:start_element, :top],
|
79
83
|
[:end_element, :top]])
|
80
84
|
end
|
81
85
|
|
@@ -84,7 +88,7 @@ class Func < ::Test::Unit::TestCase
|
|
84
88
|
input = IO.open(IO.sysopen('basic.xml'))
|
85
89
|
Ox.sax_parse(handler, input)
|
86
90
|
assert_equal(handler.calls,
|
87
|
-
[[:start_element, :top
|
91
|
+
[[:start_element, :top],
|
88
92
|
[:end_element, :top]])
|
89
93
|
end
|
90
94
|
|
@@ -96,7 +100,7 @@ class Func < ::Test::Unit::TestCase
|
|
96
100
|
end
|
97
101
|
|
98
102
|
def test_sax_instruct_simple
|
99
|
-
parse_compare(%{<?xml?>}, [[:instruct, 'xml'
|
103
|
+
parse_compare(%{<?xml?>}, [[:instruct, 'xml']])
|
100
104
|
end
|
101
105
|
|
102
106
|
def test_sax_instruct_blank
|
@@ -105,34 +109,40 @@ class Func < ::Test::Unit::TestCase
|
|
105
109
|
|
106
110
|
def test_sax_instruct_attrs
|
107
111
|
parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>},
|
108
|
-
[[:instruct, 'xml',
|
112
|
+
[[:instruct, 'xml'],
|
113
|
+
[:attr, :version, "1.0"],
|
114
|
+
[:attr, :encoding, "UTF-8"]])
|
109
115
|
end
|
110
116
|
|
111
117
|
def test_sax_instruct_loose
|
112
118
|
parse_compare(%{<? xml
|
113
119
|
version = "1.0"
|
114
120
|
encoding = "UTF-8" ?>},
|
115
|
-
[[:instruct, 'xml',
|
121
|
+
[[:instruct, 'xml'],
|
122
|
+
[:attr, :version, "1.0"],
|
123
|
+
[:attr, :encoding, "UTF-8"]])
|
116
124
|
end
|
117
125
|
|
118
126
|
def test_sax_element_simple
|
119
127
|
parse_compare(%{<top/>},
|
120
|
-
[[:start_element, :top
|
128
|
+
[[:start_element, :top],
|
121
129
|
[:end_element, :top]])
|
122
130
|
end
|
123
131
|
|
124
132
|
def test_sax_element_attrs
|
125
133
|
parse_compare(%{<top x="57" y="42"/>},
|
126
|
-
[[:start_element, :top
|
134
|
+
[[:start_element, :top],
|
135
|
+
[:attr, :x, "57"],
|
136
|
+
[:attr, :y, "42"],
|
127
137
|
[:end_element, :top]])
|
128
138
|
end
|
129
139
|
|
130
140
|
def test_sax_two_top
|
131
141
|
parse_compare(%{<top/><top/>},
|
132
|
-
[[:start_element, :top
|
142
|
+
[[:start_element, :top],
|
133
143
|
[:end_element, :top],
|
134
144
|
[:error, "invalid format, multiple top level elements", 1, 9],
|
135
|
-
[:start_element, :top
|
145
|
+
[:start_element, :top],
|
136
146
|
[:end_element, :top]])
|
137
147
|
|
138
148
|
|
@@ -146,10 +156,11 @@ encoding = "UTF-8" ?>},
|
|
146
156
|
</child>
|
147
157
|
</top>
|
148
158
|
},
|
149
|
-
[[:instruct, 'xml'
|
150
|
-
[:
|
151
|
-
[:start_element, :
|
152
|
-
[:start_element, :
|
159
|
+
[[:instruct, 'xml'],
|
160
|
+
[:attr, :version, "1.0"],
|
161
|
+
[:start_element, :top],
|
162
|
+
[:start_element, :child],
|
163
|
+
[:start_element, :grandchild],
|
153
164
|
[:end_element, :grandchild],
|
154
165
|
[:end_element, :child],
|
155
166
|
[:end_element, :top],
|
@@ -158,10 +169,11 @@ encoding = "UTF-8" ?>},
|
|
158
169
|
|
159
170
|
def test_sax_nested1_tight
|
160
171
|
parse_compare(%{<?xml version="1.0"?><top><child><grandchild/></child></top>},
|
161
|
-
[[:instruct, 'xml'
|
162
|
-
[:
|
163
|
-
[:start_element, :
|
164
|
-
[:start_element, :
|
172
|
+
[[:instruct, 'xml'],
|
173
|
+
[:attr, :version, "1.0"],
|
174
|
+
[:start_element, :top],
|
175
|
+
[:start_element, :child],
|
176
|
+
[:start_element, :grandchild],
|
165
177
|
[:end_element, :grandchild],
|
166
178
|
[:end_element, :child],
|
167
179
|
[:end_element, :top],
|
@@ -175,10 +187,11 @@ encoding = "UTF-8" ?>},
|
|
175
187
|
<grandchild/>
|
176
188
|
</parent>
|
177
189
|
</top>},
|
178
|
-
[[:instruct, 'xml'
|
179
|
-
[:
|
180
|
-
[:start_element, :
|
181
|
-
[:start_element, :
|
190
|
+
[[:instruct, 'xml'],
|
191
|
+
[:attr, :version, "1.0"],
|
192
|
+
[:start_element, :top],
|
193
|
+
[:start_element, :child],
|
194
|
+
[:start_element, :grandchild],
|
182
195
|
[:end_element, :grandchild],
|
183
196
|
[:error, "invalid format, element start and end names do not match", 5, 12]
|
184
197
|
])
|
@@ -196,16 +209,17 @@ encoding = "UTF-8" ?>},
|
|
196
209
|
</child>
|
197
210
|
</top>
|
198
211
|
},
|
199
|
-
[[:instruct, 'xml'
|
200
|
-
[:
|
201
|
-
[:start_element, :
|
202
|
-
[:start_element, :
|
212
|
+
[[:instruct, 'xml'],
|
213
|
+
[:attr, :version, "1.0"],
|
214
|
+
[:start_element, :top],
|
215
|
+
[:start_element, :child],
|
216
|
+
[:start_element, :grandchild],
|
203
217
|
[:end_element, :grandchild],
|
204
218
|
[:end_element, :child],
|
205
|
-
[:start_element, :child
|
206
|
-
[:start_element, :grandchild
|
219
|
+
[:start_element, :child],
|
220
|
+
[:start_element, :grandchild],
|
207
221
|
[:end_element, :grandchild],
|
208
|
-
[:start_element, :grandchild
|
222
|
+
[:start_element, :grandchild],
|
209
223
|
[:end_element, :grandchild],
|
210
224
|
[:end_element, :child],
|
211
225
|
[:end_element, :top],
|
@@ -216,8 +230,8 @@ encoding = "UTF-8" ?>},
|
|
216
230
|
<top>
|
217
231
|
<child/>
|
218
232
|
},
|
219
|
-
[[:start_element, :top
|
220
|
-
[:start_element, :child
|
233
|
+
[[:start_element, :top],
|
234
|
+
[:start_element, :child],
|
221
235
|
[:end_element, :child],
|
222
236
|
[:error, "invalid format, element not terminated", 4, 1]
|
223
237
|
])
|
@@ -225,7 +239,7 @@ encoding = "UTF-8" ?>},
|
|
225
239
|
|
226
240
|
def test_sax_text
|
227
241
|
parse_compare(%{<top>This is some text.</top>},
|
228
|
-
[[:start_element, :top
|
242
|
+
[[:start_element, :top],
|
229
243
|
[:text, "This is some text."],
|
230
244
|
[:end_element, :top]
|
231
245
|
])
|
@@ -233,7 +247,7 @@ encoding = "UTF-8" ?>},
|
|
233
247
|
|
234
248
|
def test_sax_text_no_term
|
235
249
|
parse_compare(%{<top>This is some text.},
|
236
|
-
[[:start_element, :top
|
250
|
+
[[:start_element, :top],
|
237
251
|
[:error, "invalid format, text terminated unexpectedly", 1, 24],
|
238
252
|
])
|
239
253
|
end
|
@@ -244,9 +258,10 @@ encoding = "UTF-8" ?>},
|
|
244
258
|
<!DOCTYPE top PUBLIC "top.dtd">
|
245
259
|
<top/>
|
246
260
|
},
|
247
|
-
[[:instruct, 'xml'
|
261
|
+
[[:instruct, 'xml'],
|
262
|
+
[:attr, :version, "1.0"],
|
248
263
|
[:doctype, ' top PUBLIC "top.dtd"'],
|
249
|
-
[:start_element, :top
|
264
|
+
[:start_element, :top],
|
250
265
|
[:end_element, :top]])
|
251
266
|
end
|
252
267
|
|
@@ -255,8 +270,9 @@ encoding = "UTF-8" ?>},
|
|
255
270
|
<top/>
|
256
271
|
<!DOCTYPE top PUBLIC "top.dtd">
|
257
272
|
},
|
258
|
-
[[:instruct, 'xml'
|
259
|
-
[:
|
273
|
+
[[:instruct, 'xml'],
|
274
|
+
[:attr, :version, "1.0"],
|
275
|
+
[:start_element, :top],
|
260
276
|
[:end_element, :top],
|
261
277
|
[:error, "invalid format, DOCTYPE can not come after an element", 3, 11],
|
262
278
|
[:doctype, ' top PUBLIC "top.dtd"']])
|
@@ -270,8 +286,9 @@ encoding = "UTF-8" ?>},
|
|
270
286
|
},
|
271
287
|
[[:doctype, " top PUBLIC \"top.dtd\""],
|
272
288
|
[:error, "invalid format, instruction must come before elements", 3, 3],
|
273
|
-
[:instruct, "xml"
|
274
|
-
[:
|
289
|
+
[:instruct, "xml"],
|
290
|
+
[:attr, :version, "1.0"],
|
291
|
+
[:start_element, :top],
|
275
292
|
[:end_element, :top]])
|
276
293
|
end
|
277
294
|
|
@@ -280,9 +297,10 @@ encoding = "UTF-8" ?>},
|
|
280
297
|
<!--First comment.-->
|
281
298
|
<top>Before<!--Nested comment.-->After</top>
|
282
299
|
},
|
283
|
-
[[:instruct, 'xml'
|
300
|
+
[[:instruct, 'xml'],
|
301
|
+
[:attr, :version, "1.0"],
|
284
302
|
[:comment, 'First comment.'],
|
285
|
-
[:start_element, :top
|
303
|
+
[:start_element, :top],
|
286
304
|
[:text, 'Before'],
|
287
305
|
[:comment, 'Nested comment.'],
|
288
306
|
[:text, 'After'],
|
@@ -294,10 +312,11 @@ encoding = "UTF-8" ?>},
|
|
294
312
|
<!--First comment.--
|
295
313
|
<top/>
|
296
314
|
},
|
297
|
-
[[:instruct, 'xml'
|
315
|
+
[[:instruct, 'xml'],
|
316
|
+
[:attr, :version, "1.0"],
|
298
317
|
[:error, "invalid format, comment terminated unexpectedly", 3, 1], # continue on
|
299
318
|
[:comment, 'First comment.'],
|
300
|
-
[:start_element, :top
|
319
|
+
[:start_element, :top],
|
301
320
|
[:end_element, :top]])
|
302
321
|
end
|
303
322
|
|
@@ -307,8 +326,9 @@ encoding = "UTF-8" ?>},
|
|
307
326
|
<![CDATA[This is CDATA.]]>
|
308
327
|
</top>
|
309
328
|
},
|
310
|
-
[[:instruct, 'xml'
|
311
|
-
[:
|
329
|
+
[[:instruct, 'xml'],
|
330
|
+
[:attr, :version, "1.0"],
|
331
|
+
[:start_element, :top],
|
312
332
|
[:cdata, 'This is CDATA.'],
|
313
333
|
[:end_element, :top]])
|
314
334
|
end
|
@@ -319,8 +339,9 @@ encoding = "UTF-8" ?>},
|
|
319
339
|
<![CDATA[This is CDATA.]]
|
320
340
|
</top>
|
321
341
|
},
|
322
|
-
[[:instruct, 'xml'
|
323
|
-
[:
|
342
|
+
[[:instruct, 'xml'],
|
343
|
+
[:attr, :version, "1.0"],
|
344
|
+
[:start_element, :top],
|
324
345
|
[:error, "invalid format, cdata terminated unexpectedly", 5, 1]])
|
325
346
|
end
|
326
347
|
|
@@ -340,27 +361,45 @@ encoding = "UTF-8" ?>},
|
|
340
361
|
</row>
|
341
362
|
</table>
|
342
363
|
},
|
343
|
-
[[:instruct,
|
344
|
-
[:
|
364
|
+
[[:instruct, "xml"],
|
365
|
+
[:attr, :version, "1.0"],
|
366
|
+
[:instruct, "ox"],
|
367
|
+
[:attr, :version, "1.0"],
|
368
|
+
[:attr, :mode, "object"],
|
369
|
+
[:attr, :circular, "no"],
|
370
|
+
[:attr, :xsd_date, "no"],
|
345
371
|
[:doctype, " table PUBLIC \"-//ox//DTD TABLE 1.0//EN\" \"http://www.ohler.com/DTDs/TestTable-1.0.dtd\""],
|
346
|
-
[:start_element, :table
|
347
|
-
[:start_element, :row
|
348
|
-
[:
|
372
|
+
[:start_element, :table],
|
373
|
+
[:start_element, :row],
|
374
|
+
[:attr, :id, "00004"],
|
375
|
+
[:start_element, :cell],
|
376
|
+
[:attr, :id, "A"],
|
377
|
+
[:attr, :type, "Fixnum"],
|
349
378
|
[:text, "1234"],
|
350
379
|
[:end_element, :cell],
|
351
|
-
[:start_element, :cell
|
380
|
+
[:start_element, :cell],
|
381
|
+
[:attr, :id, "B"],
|
382
|
+
[:attr, :type, "String"],
|
352
383
|
[:text, "A string."],
|
353
384
|
[:end_element, :cell],
|
354
|
-
[:start_element, :cell
|
385
|
+
[:start_element, :cell],
|
386
|
+
[:attr, :id, "C"],
|
387
|
+
[:attr, :type, "String"],
|
355
388
|
[:text, "This is a longer string that stretches over a larger number of characters."],
|
356
389
|
[:end_element, :cell],
|
357
|
-
[:start_element, :cell
|
390
|
+
[:start_element, :cell],
|
391
|
+
[:attr, :id, "D"],
|
392
|
+
[:attr, :type, "Float"],
|
358
393
|
[:text, "-12.345"],
|
359
394
|
[:end_element, :cell],
|
360
|
-
[:start_element, :cell
|
395
|
+
[:start_element, :cell],
|
396
|
+
[:attr, :id, "E"],
|
397
|
+
[:attr, :type, "Date"],
|
361
398
|
[:text, "2011-09-18 23:07:26 +0900"],
|
362
399
|
[:end_element, :cell],
|
363
|
-
[:start_element, :cell
|
400
|
+
[:start_element, :cell],
|
401
|
+
[:attr, :id, "F"],
|
402
|
+
[:attr, :type, "Image"],
|
364
403
|
[:cdata, "xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00xx00"],
|
365
404
|
[:end_element, :cell],
|
366
405
|
[:end_element, :row],
|
@@ -371,8 +410,10 @@ encoding = "UTF-8" ?>},
|
|
371
410
|
parse_compare(%{<?xml version="1.0" encoding="UTF-8"?>
|
372
411
|
<top>ピーター</top>
|
373
412
|
},
|
374
|
-
[[:instruct,
|
375
|
-
[:
|
413
|
+
[[:instruct, "xml"],
|
414
|
+
[:attr, :version, "1.0"],
|
415
|
+
[:attr, :encoding, "UTF-8"],
|
416
|
+
[:start_element, :top],
|
376
417
|
[:text, 'ピーター'],
|
377
418
|
[:end_element, :top]])
|
378
419
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 1.3.
|
8
|
+
- 2
|
9
|
+
version: 1.3.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Peter Ohler
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-09-
|
17
|
+
date: 2011-09-21 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|