ox 1.3.2 → 1.3.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +51 -8
- data/ext/ox/ox.c +22 -5
- data/ext/ox/ox.h +3 -1
- data/ext/ox/sax.c +75 -7
- data/lib/ox/document.rb +10 -0
- data/lib/ox/version.rb +1 -1
- data/test/perf_sax.rb +0 -1
- data/test/sax_example.rb +37 -0
- data/test/sax_test.rb +15 -2
- metadata +4 -3
data/README.md
CHANGED
@@ -26,9 +26,12 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
26
26
|
|
27
27
|
## <a name="release">Release Notes</a>
|
28
28
|
|
29
|
-
### Release 1.3.
|
29
|
+
### Release 1.3.3
|
30
|
+
|
31
|
+
- Added an option to the SAX parser to convert special characters.
|
32
|
+
|
33
|
+
- The default options encoding is now used as the default for SAX parsing.
|
30
34
|
|
31
|
-
- Changed SAX parser API for element and instruction attributes
|
32
35
|
|
33
36
|
## <a name="description">Description</a>
|
34
37
|
|
@@ -94,26 +97,66 @@ Ox is compatible with Ruby 1.8.7, 1.9.2, JRuby, and RBX.
|
|
94
97
|
### Generic XML Writing and Parsing:
|
95
98
|
|
96
99
|
require 'ox'
|
97
|
-
|
100
|
+
|
98
101
|
doc = Ox::Document.new(:version => '1.0')
|
99
|
-
|
102
|
+
|
100
103
|
top = Ox::Element.new('top')
|
101
104
|
top[:name] = 'sample'
|
102
105
|
doc << top
|
103
|
-
|
106
|
+
|
104
107
|
mid = Ox::Element.new('middle')
|
105
108
|
mid[:name] = 'second'
|
106
109
|
top << mid
|
107
|
-
|
110
|
+
|
108
111
|
bot = Ox::Element.new('bottom')
|
109
112
|
bot[:name] = 'third'
|
110
113
|
mid << bot
|
111
|
-
|
114
|
+
|
112
115
|
xml = Ox.dump(doc)
|
113
|
-
|
116
|
+
|
117
|
+
# xml =
|
118
|
+
# <top name="sample">
|
119
|
+
# <middle name="second">
|
120
|
+
# <bottom name="third"/>
|
121
|
+
# </middle>
|
122
|
+
# </top>
|
123
|
+
|
114
124
|
doc2 = Ox.parse(xml)
|
115
125
|
puts "Same? #{doc == doc2}"
|
126
|
+
# true
|
127
|
+
|
128
|
+
### SAX XML Parsing:
|
116
129
|
|
130
|
+
require 'stringio'
|
131
|
+
require 'ox'
|
132
|
+
|
133
|
+
class Sample < ::Ox::Sax
|
134
|
+
def start_element(name); puts "start: #{name}"; end
|
135
|
+
def end_element(name); puts "end: #{name}"; end
|
136
|
+
def attr(name, value); puts " #{name} => #{value}"; end
|
137
|
+
def text(value); puts "text #{value}"; end
|
138
|
+
end
|
139
|
+
|
140
|
+
io = StringIO.new(%{
|
141
|
+
<top name="sample">
|
142
|
+
<middle name="second">
|
143
|
+
<bottom name="third"/>
|
144
|
+
</middle>
|
145
|
+
</top>
|
146
|
+
})
|
147
|
+
|
148
|
+
handler = Sample.new()
|
149
|
+
Ox.sax_parse(handler, io)
|
150
|
+
# outputs
|
151
|
+
# start: top
|
152
|
+
# name => sample
|
153
|
+
# start: middle
|
154
|
+
# name => second
|
155
|
+
# start: bottom
|
156
|
+
# name => third
|
157
|
+
# end: bottom
|
158
|
+
# end: middle
|
159
|
+
# end: top
|
117
160
|
|
118
161
|
### Object XML format
|
119
162
|
|
data/ext/ox/ox.c
CHANGED
@@ -94,6 +94,7 @@ VALUE trace_sym;
|
|
94
94
|
VALUE strict_sym;
|
95
95
|
VALUE with_dtd_sym;
|
96
96
|
VALUE with_instruct_sym;
|
97
|
+
VALUE convert_special_sym;
|
97
98
|
VALUE with_xml_sym;
|
98
99
|
VALUE empty_string;
|
99
100
|
VALUE zero_fixnum;
|
@@ -111,7 +112,7 @@ Cache symbol_cache = 0;
|
|
111
112
|
Cache class_cache = 0;
|
112
113
|
Cache attr_cache = 0;
|
113
114
|
|
114
|
-
|
115
|
+
struct _Options default_options = {
|
115
116
|
{ '\0' }, // encoding
|
116
117
|
2, // indent
|
117
118
|
0, // trace
|
@@ -444,16 +445,31 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
444
445
|
return load(xml, argc - 1, argv + 1, self);
|
445
446
|
}
|
446
447
|
|
447
|
-
/* call-seq: sax_parse(handler, io)
|
448
|
+
/* call-seq: sax_parse(handler, io, options)
|
448
449
|
*
|
449
450
|
* Parses an IO stream or file containing an XML document. Raises an exception
|
450
451
|
* if the XML is malformed or the classes specified are not valid.
|
451
452
|
* @param [Ox::Sax] handler SAX (responds to OX::Sax methods) like handler
|
452
453
|
* @param [IO|String] io IO Object to read from
|
454
|
+
* @param [Hash] options parse options
|
455
|
+
* @param [true|false] :convert_special flag indicating special special characters like < are converted
|
453
456
|
*/
|
454
457
|
static VALUE
|
455
|
-
sax_parse(
|
456
|
-
|
458
|
+
sax_parse(int argc, VALUE *argv, VALUE self) {
|
459
|
+
int convert = 0;
|
460
|
+
|
461
|
+
if (argc < 2) {
|
462
|
+
rb_raise(rb_eArgError, "Wrong number of arguments to sax_parse.\n");
|
463
|
+
}
|
464
|
+
if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
|
465
|
+
VALUE h = argv[2];
|
466
|
+
VALUE v;
|
467
|
+
|
468
|
+
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
469
|
+
convert = (Qtrue == v);
|
470
|
+
}
|
471
|
+
}
|
472
|
+
ox_sax_parse(argv[0], argv[1], convert);
|
457
473
|
|
458
474
|
return Qnil;
|
459
475
|
}
|
@@ -607,7 +623,7 @@ void Init_ox() {
|
|
607
623
|
rb_define_module_function(Ox, "parse_obj", to_obj, 1);
|
608
624
|
rb_define_module_function(Ox, "parse", to_gen, 1);
|
609
625
|
rb_define_module_function(Ox, "load", load_str, -1);
|
610
|
-
rb_define_module_function(Ox, "sax_parse", sax_parse,
|
626
|
+
rb_define_module_function(Ox, "sax_parse", sax_parse, -1);
|
611
627
|
|
612
628
|
rb_define_module_function(Ox, "to_xml", dump, -1);
|
613
629
|
rb_define_module_function(Ox, "dump", dump, -1);
|
@@ -668,6 +684,7 @@ void Init_ox() {
|
|
668
684
|
with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_ary_push(keep, with_dtd_sym);
|
669
685
|
with_instruct_sym = ID2SYM(rb_intern("with_instructions")); rb_ary_push(keep, with_instruct_sym);
|
670
686
|
with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_ary_push(keep, with_xml_sym);
|
687
|
+
convert_special_sym = ID2SYM(rb_intern("convert_special")); rb_ary_push(keep, convert_special_sym);
|
671
688
|
|
672
689
|
empty_string = rb_str_new2(""); rb_ary_push(keep, empty_string);
|
673
690
|
zero_fixnum = INT2NUM(0); rb_ary_push(keep, zero_fixnum);
|
data/ext/ox/ox.h
CHANGED
@@ -198,11 +198,13 @@ typedef struct _Options {
|
|
198
198
|
extern VALUE parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort);
|
199
199
|
extern void _raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
|
200
200
|
|
201
|
-
extern void ox_sax_parse(VALUE handler, VALUE io);
|
201
|
+
extern void ox_sax_parse(VALUE handler, VALUE io, int convert);
|
202
202
|
|
203
203
|
extern char* write_obj_to_str(VALUE obj, Options copts);
|
204
204
|
extern void write_obj_to_file(VALUE obj, const char *path, Options copts);
|
205
205
|
|
206
|
+
extern struct _Options default_options;
|
207
|
+
|
206
208
|
extern VALUE Ox;
|
207
209
|
|
208
210
|
extern ID at_id;
|
data/ext/ox/sax.c
CHANGED
@@ -51,6 +51,7 @@ typedef struct _SaxDrive {
|
|
51
51
|
int col;
|
52
52
|
VALUE handler;
|
53
53
|
int (*read_func)(struct _SaxDrive *dr);
|
54
|
+
int convert_special;
|
54
55
|
union {
|
55
56
|
int fd;
|
56
57
|
VALUE io;
|
@@ -69,7 +70,7 @@ typedef struct _SaxDrive {
|
|
69
70
|
#endif
|
70
71
|
} *SaxDrive;
|
71
72
|
|
72
|
-
static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io);
|
73
|
+
static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert);
|
73
74
|
static void sax_drive_cleanup(SaxDrive dr);
|
74
75
|
static int sax_drive_read(SaxDrive dr);
|
75
76
|
static void sax_drive_error(SaxDrive dr, const char *msg, int critical);
|
@@ -84,6 +85,7 @@ static int read_text(SaxDrive dr);
|
|
84
85
|
static int read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml);
|
85
86
|
static char read_name_token(SaxDrive dr);
|
86
87
|
static int read_quoted_value(SaxDrive dr);
|
88
|
+
static int collapse_special(char *str);
|
87
89
|
|
88
90
|
static VALUE io_cb(VALUE rdr);
|
89
91
|
static int read_from_io(SaxDrive dr);
|
@@ -156,10 +158,10 @@ str2sym(const char *str) {
|
|
156
158
|
|
157
159
|
|
158
160
|
void
|
159
|
-
ox_sax_parse(VALUE handler, VALUE io) {
|
161
|
+
ox_sax_parse(VALUE handler, VALUE io, int convert) {
|
160
162
|
struct _SaxDrive dr;
|
161
163
|
|
162
|
-
sax_drive_init(&dr, handler, io);
|
164
|
+
sax_drive_init(&dr, handler, io, convert);
|
163
165
|
#if 0
|
164
166
|
printf("*** sax_parse with these flags\n");
|
165
167
|
printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
|
@@ -177,7 +179,7 @@ ox_sax_parse(VALUE handler, VALUE io) {
|
|
177
179
|
}
|
178
180
|
|
179
181
|
static void
|
180
|
-
sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
|
182
|
+
sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
181
183
|
if (rb_respond_to(io, readpartial_id)) {
|
182
184
|
VALUE rfd;
|
183
185
|
|
@@ -200,6 +202,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
|
|
200
202
|
dr->line = 1;
|
201
203
|
dr->col = 0;
|
202
204
|
dr->handler = handler;
|
205
|
+
dr->convert_special = convert;
|
203
206
|
dr->has_instruct = rb_respond_to(handler, instruct_id);
|
204
207
|
dr->has_attr = rb_respond_to(handler, attr_id);
|
205
208
|
dr->has_doctype = rb_respond_to(handler, doctype_id);
|
@@ -210,7 +213,11 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
|
|
210
213
|
dr->has_end_element = rb_respond_to(handler, end_element_id);
|
211
214
|
dr->has_error = rb_respond_to(handler, error_id);
|
212
215
|
#ifdef HAVE_RUBY_ENCODING_H
|
213
|
-
|
216
|
+
if ('\0' == *default_options.encoding) {
|
217
|
+
dr->encoding = 0;
|
218
|
+
} else {
|
219
|
+
dr->encoding = rb_enc_find(default_options.encoding);
|
220
|
+
}
|
214
221
|
#endif
|
215
222
|
}
|
216
223
|
|
@@ -586,8 +593,13 @@ read_text(SaxDrive dr) {
|
|
586
593
|
}
|
587
594
|
*(dr->cur - 1) = '\0';
|
588
595
|
if (dr->has_text) {
|
589
|
-
VALUE
|
590
|
-
|
596
|
+
VALUE args[1];
|
597
|
+
|
598
|
+
if (dr->convert_special) {
|
599
|
+
if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
|
600
|
+
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
601
|
+
}
|
602
|
+
}
|
591
603
|
args[0] = rb_str_new2(dr->str);
|
592
604
|
#ifdef HAVE_RUBY_ENCODING_H
|
593
605
|
if (0 != dr->encoding) {
|
@@ -642,6 +654,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
642
654
|
VALUE args[2];
|
643
655
|
|
644
656
|
args[0] = name;
|
657
|
+
if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
|
658
|
+
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
659
|
+
}
|
645
660
|
args[1] = rb_str_new2(dr->str);
|
646
661
|
#ifdef HAVE_RUBY_ENCODING_H
|
647
662
|
if (0 != dr->encoding) {
|
@@ -758,3 +773,56 @@ read_from_fd(SaxDrive dr) {
|
|
758
773
|
}
|
759
774
|
return 0;
|
760
775
|
}
|
776
|
+
|
777
|
+
|
778
|
+
static int
|
779
|
+
collapse_special(char *str) {
|
780
|
+
char *s = str;
|
781
|
+
char *b = str;
|
782
|
+
|
783
|
+
while ('\0' != *s) {
|
784
|
+
if ('&' == *s) {
|
785
|
+
int c;
|
786
|
+
char *end;
|
787
|
+
|
788
|
+
s++;
|
789
|
+
if ('#' == *s) {
|
790
|
+
s++;
|
791
|
+
c = (int)strtol(s, &end, 10);
|
792
|
+
if (';' != *end) {
|
793
|
+
return EDOM;
|
794
|
+
}
|
795
|
+
s = end + 1;
|
796
|
+
} else if (0 == strncasecmp(s, "lt;", 3)) {
|
797
|
+
c = '<';
|
798
|
+
s += 3;
|
799
|
+
} else if (0 == strncasecmp(s, "gt;", 3)) {
|
800
|
+
c = '>';
|
801
|
+
s += 3;
|
802
|
+
} else if (0 == strncasecmp(s, "amp;", 4)) {
|
803
|
+
c = '&';
|
804
|
+
s += 4;
|
805
|
+
} else if (0 == strncasecmp(s, "quot;", 5)) {
|
806
|
+
c = '"';
|
807
|
+
s += 5;
|
808
|
+
} else if (0 == strncasecmp(s, "apos;", 5)) {
|
809
|
+
c = '\'';
|
810
|
+
s += 5;
|
811
|
+
} else {
|
812
|
+
c = '?';
|
813
|
+
while (';' != *s++) {
|
814
|
+
if ('\0' == *s) {
|
815
|
+
return EDOM;
|
816
|
+
}
|
817
|
+
}
|
818
|
+
s++;
|
819
|
+
}
|
820
|
+
*b++ = (char)c;
|
821
|
+
} else {
|
822
|
+
*b++ = *s++;
|
823
|
+
}
|
824
|
+
}
|
825
|
+
*b = '\0';
|
826
|
+
|
827
|
+
return 0;
|
828
|
+
}
|
data/lib/ox/document.rb
CHANGED
@@ -16,5 +16,15 @@ module Ox
|
|
16
16
|
@attributes[:standalone] = prolog[:standalone] unless prolog[:standalone].nil?
|
17
17
|
end
|
18
18
|
|
19
|
+
# Returns the first Element in the document.
|
20
|
+
def root()
|
21
|
+
unless @nodes.nil?
|
22
|
+
@nodes.each do |n|
|
23
|
+
return n if n.is_a?(::Ox::Element)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
nil
|
27
|
+
end
|
28
|
+
|
19
29
|
end # Document
|
20
30
|
end # Ox
|
data/lib/ox/version.rb
CHANGED
data/test/perf_sax.rb
CHANGED
data/test/sax_example.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby -wW1
|
2
|
+
|
3
|
+
$: << '../lib'
|
4
|
+
$: << '../ext'
|
5
|
+
|
6
|
+
require 'stringio'
|
7
|
+
require 'ox'
|
8
|
+
|
9
|
+
class Sample < ::Ox::Sax
|
10
|
+
def start_element(name); puts "start: #{name}"; end
|
11
|
+
def end_element(name); puts "end: #{name}"; end
|
12
|
+
def attr(name, value); puts " #{name} => #{value}"; end
|
13
|
+
def text(value); puts "text #{value}"; end
|
14
|
+
end
|
15
|
+
|
16
|
+
io = StringIO.new(%{
|
17
|
+
<top name="sample">
|
18
|
+
<middle name="second">
|
19
|
+
<bottom name="third"/>
|
20
|
+
</middle>
|
21
|
+
</top>
|
22
|
+
})
|
23
|
+
|
24
|
+
handler = Sample.new()
|
25
|
+
Ox.sax_parse(handler, io)
|
26
|
+
|
27
|
+
# outputs
|
28
|
+
# start: top
|
29
|
+
# name => sample
|
30
|
+
# start: middle
|
31
|
+
# name => second
|
32
|
+
# start: bottom
|
33
|
+
# name => third
|
34
|
+
# end: bottom
|
35
|
+
# end: middle
|
36
|
+
# end: top
|
37
|
+
|
data/test/sax_test.rb
CHANGED
@@ -92,10 +92,14 @@ class Func < ::Test::Unit::TestCase
|
|
92
92
|
[:end_element, :top]])
|
93
93
|
end
|
94
94
|
|
95
|
-
def parse_compare(xml, expected, handler_class=AllSax)
|
95
|
+
def parse_compare(xml, expected, handler_class=AllSax, special=false)
|
96
96
|
handler = handler_class.new()
|
97
97
|
input = StringIO.new(xml)
|
98
|
-
|
98
|
+
if special
|
99
|
+
Ox.sax_parse(handler, input, :convert_special => true)
|
100
|
+
else
|
101
|
+
Ox.sax_parse(handler, input)
|
102
|
+
end
|
99
103
|
assert_equal(expected, handler.calls)
|
100
104
|
end
|
101
105
|
|
@@ -245,6 +249,15 @@ encoding = "UTF-8" ?>},
|
|
245
249
|
])
|
246
250
|
end
|
247
251
|
|
252
|
+
def test_sax_special
|
253
|
+
parse_compare(%{<top name="A&Z">This is <some> text.</top>},
|
254
|
+
[[:start_element, :top],
|
255
|
+
[:attr, :name, 'A&Z'],
|
256
|
+
[:text, "This is <some> text."],
|
257
|
+
[:end_element, :top]
|
258
|
+
], AllSax, true)
|
259
|
+
end
|
260
|
+
|
248
261
|
def test_sax_text_no_term
|
249
262
|
parse_compare(%{<top>This is some text.},
|
250
263
|
[[:start_element, :top],
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 3
|
8
|
-
-
|
9
|
-
version: 1.3.
|
8
|
+
- 3
|
9
|
+
version: 1.3.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Peter Ohler
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-09-
|
17
|
+
date: 2011-09-24 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -82,6 +82,7 @@ files:
|
|
82
82
|
- test/perf_sax.rb
|
83
83
|
- test/perf_write.rb
|
84
84
|
- test/sample.rb
|
85
|
+
- test/sax_example.rb
|
85
86
|
- test/sax_test.rb
|
86
87
|
- test/test.rb
|
87
88
|
- test/Sample.graffle
|